summaryrefslogtreecommitdiff
path: root/src/video_core/command_classes
diff options
context:
space:
mode:
authorGravatar Levi2021-01-10 22:09:56 -0700
committerGravatar Levi2021-01-10 22:09:56 -0700
commit7a3c884e39fccfbb498b855080bffabc9ce2e7f1 (patch)
tree5056f9406dec188439cb0deb87603498243a9412 /src/video_core/command_classes
parentMore forgetting... duh (diff)
parentMerge pull request #5229 from Morph1984/fullscreen-opt (diff)
downloadyuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.gz
yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.xz
yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.zip
Merge remote-tracking branch 'upstream/master' into int-flags
Diffstat (limited to 'src/video_core/command_classes')
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp129
-rw-r--r--src/video_core/command_classes/codecs/codec.h70
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp293
-rw-r--r--src/video_core/command_classes/codecs/h264.h118
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp989
-rw-r--r--src/video_core/command_classes/codecs/vp9.h197
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h302
-rw-r--r--src/video_core/command_classes/host1x.cpp30
-rw-r--r--src/video_core/command_classes/host1x.h37
-rw-r--r--src/video_core/command_classes/nvdec.cpp48
-rw-r--r--src/video_core/command_classes/nvdec.h38
-rw-r--r--src/video_core/command_classes/nvdec_common.h48
-rw-r--r--src/video_core/command_classes/sync_manager.cpp60
-rw-r--r--src/video_core/command_classes/sync_manager.h64
-rw-r--r--src/video_core/command_classes/vic.cpp175
-rw-r--r--src/video_core/command_classes/vic.h110
16 files changed, 2708 insertions, 0 deletions
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
new file mode 100644
index 000000000..39bc923a5
--- /dev/null
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -0,0 +1,129 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <fstream>
7#include <vector>
8#include "common/assert.h"
9#include "video_core/command_classes/codecs/codec.h"
10#include "video_core/command_classes/codecs/h264.h"
11#include "video_core/command_classes/codecs/vp9.h"
12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h"
14
15extern "C" {
16#include <libavutil/opt.h>
17}
18
19namespace Tegra {
20
21void AVFrameDeleter(AVFrame* ptr) {
22 av_frame_unref(ptr);
23 av_free(ptr);
24}
25
26Codec::Codec(GPU& gpu_)
27 : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
28 vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
29
30Codec::~Codec() {
31 if (!initialized) {
32 return;
33 }
34 // Free libav memory
35 AVFrame* av_frame{nullptr};
36 avcodec_send_packet(av_codec_ctx, nullptr);
37 av_frame = av_frame_alloc();
38 avcodec_receive_frame(av_codec_ctx, av_frame);
39 avcodec_flush_buffers(av_codec_ctx);
40
41 av_frame_unref(av_frame);
42 av_free(av_frame);
43 avcodec_close(av_codec_ctx);
44}
45
46void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
47 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec);
48 current_codec = codec;
49}
50
51void Codec::StateWrite(u32 offset, u64 arguments) {
52 u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
53 std::memcpy(state_offset, &arguments, sizeof(u64));
54}
55
56void Codec::Decode() {
57 bool is_first_frame = false;
58
59 if (!initialized) {
60 if (current_codec == NvdecCommon::VideoCodec::H264) {
61 av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
62 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
63 av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
64 } else {
65 LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
66 return;
67 }
68
69 av_codec_ctx = avcodec_alloc_context3(av_codec);
70 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
71
72 // TODO(ameerj): libavcodec gpu hw acceleration
73
74 const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
75 if (av_error < 0) {
76 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
77 avcodec_close(av_codec_ctx);
78 return;
79 }
80 initialized = true;
81 is_first_frame = true;
82 }
83 bool vp9_hidden_frame = false;
84
85 AVPacket packet{};
86 av_init_packet(&packet);
87 std::vector<u8> frame_data;
88
89 if (current_codec == NvdecCommon::VideoCodec::H264) {
90 frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
91 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
92 frame_data = vp9_decoder->ComposeFrameHeader(state);
93 vp9_hidden_frame = vp9_decoder->WasFrameHidden();
94 }
95
96 packet.data = frame_data.data();
97 packet.size = static_cast<int>(frame_data.size());
98
99 avcodec_send_packet(av_codec_ctx, &packet);
100
101 if (!vp9_hidden_frame) {
102 // Only receive/store visible frames
103 AVFramePtr frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
104 avcodec_receive_frame(av_codec_ctx, frame.get());
105 av_frames.push(std::move(frame));
106 // Limit queue to 10 frames. Workaround for ZLA decode and queue spam
107 if (av_frames.size() > 10) {
108 av_frames.pop();
109 }
110 }
111}
112
113AVFramePtr Codec::GetCurrentFrame() {
114 // Sometimes VIC will request more frames than have been decoded.
115 // in this case, return a nullptr and don't overwrite previous frame data
116 if (av_frames.empty()) {
117 return AVFramePtr{nullptr, AVFrameDeleter};
118 }
119
120 AVFramePtr frame = std::move(av_frames.front());
121 av_frames.pop();
122 return frame;
123}
124
125NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
126 return current_codec;
127}
128
129} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
new file mode 100644
index 000000000..8a2a6c360
--- /dev/null
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -0,0 +1,70 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <queue>
9#include "common/common_types.h"
10#include "video_core/command_classes/nvdec_common.h"
11
12extern "C" {
13#if defined(__GNUC__) || defined(__clang__)
14#pragma GCC diagnostic push
15#pragma GCC diagnostic ignored "-Wconversion"
16#endif
17#include <libavcodec/avcodec.h>
18#if defined(__GNUC__) || defined(__clang__)
19#pragma GCC diagnostic pop
20#endif
21}
22
23namespace Tegra {
24class GPU;
25struct VicRegisters;
26
27void AVFrameDeleter(AVFrame* ptr);
28using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
29
30namespace Decoder {
31class H264;
32class VP9;
33} // namespace Decoder
34
35class Codec {
36public:
37 explicit Codec(GPU& gpu);
38 ~Codec();
39
40 /// Sets NVDEC video stream codec
41 void SetTargetCodec(NvdecCommon::VideoCodec codec);
42
43 /// Populate NvdecRegisters state with argument value at the provided offset
44 void StateWrite(u32 offset, u64 arguments);
45
46 /// Call decoders to construct headers, decode AVFrame with ffmpeg
47 void Decode();
48
49 /// Returns next decoded frame
50 [[nodiscard]] AVFramePtr GetCurrentFrame();
51
52 /// Returns the value of current_codec
53 [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
54
55private:
56 bool initialized{};
57 NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
58
59 AVCodec* av_codec{nullptr};
60 AVCodecContext* av_codec_ctx{nullptr};
61
62 GPU& gpu;
63 std::unique_ptr<Decoder::H264> h264_decoder;
64 std::unique_ptr<Decoder::VP9> vp9_decoder;
65
66 NvdecCommon::NvdecRegisters state{};
67 std::queue<AVFramePtr> av_frames{};
68};
69
70} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
new file mode 100644
index 000000000..65bbeac78
--- /dev/null
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -0,0 +1,293 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#include <array>
22#include "common/bit_util.h"
23#include "video_core/command_classes/codecs/h264.h"
24#include "video_core/gpu.h"
25#include "video_core/memory_manager.h"
26
27namespace Tegra::Decoder {
28namespace {
29// ZigZag LUTs from libavcodec.
30constexpr std::array<u8, 64> zig_zag_direct{
31 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48,
32 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
33 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
34};
35
36constexpr std::array<u8, 16> zig_zag_scan{
37 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
38 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
39};
40} // Anonymous namespace
41
42H264::H264(GPU& gpu_) : gpu(gpu_) {}
43
44H264::~H264() = default;
45
46const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
47 bool is_first_frame) {
48 H264DecoderContext context{};
49 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
50
51 const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
52 if (!is_first_frame && frame_number != 0) {
53 frame.resize(context.frame_data_size);
54
55 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
56 } else {
57 /// Encode header
58 H264BitWriter writer{};
59 writer.WriteU(1, 24);
60 writer.WriteU(0, 1);
61 writer.WriteU(3, 2);
62 writer.WriteU(7, 5);
63 writer.WriteU(100, 8);
64 writer.WriteU(0, 8);
65 writer.WriteU(31, 8);
66 writer.WriteUe(0);
67 const auto chroma_format_idc =
68 static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
69 writer.WriteUe(chroma_format_idc);
70 if (chroma_format_idc == 3) {
71 writer.WriteBit(false);
72 }
73
74 writer.WriteUe(0);
75 writer.WriteUe(0);
76 writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
77 writer.WriteBit(false); // Scaling matrix present flag
78
79 const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
80 writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
81 writer.WriteUe(order_cnt_type);
82 if (order_cnt_type == 0) {
83 writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
84 } else if (order_cnt_type == 1) {
85 writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
86
87 writer.WriteSe(0);
88 writer.WriteSe(0);
89 writer.WriteUe(0);
90 }
91
92 const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
93 (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
94
95 writer.WriteUe(16);
96 writer.WriteBit(false);
97 writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
98 writer.WriteUe(pic_height - 1);
99 writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
100
101 if (!context.h264_parameter_set.frame_mbs_only_flag) {
102 writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
103 }
104
105 writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
106 writer.WriteBit(false); // Frame cropping flag
107 writer.WriteBit(false); // VUI parameter present flag
108
109 writer.End();
110
111 // H264 PPS
112 writer.WriteU(1, 24);
113 writer.WriteU(0, 1);
114 writer.WriteU(3, 2);
115 writer.WriteU(8, 5);
116
117 writer.WriteUe(0);
118 writer.WriteUe(0);
119
120 writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
121 writer.WriteBit(false);
122 writer.WriteUe(0);
123 writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
124 writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
125 writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
126 writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
127 s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
128 pic_init_qp = (pic_init_qp << 26) >> 26;
129 writer.WriteSe(pic_init_qp);
130 writer.WriteSe(0);
131 s32 chroma_qp_index_offset =
132 static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
133 chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
134
135 writer.WriteSe(chroma_qp_index_offset);
136 writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
137 writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
138 writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
139 writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
140
141 writer.WriteBit(true);
142
143 for (s32 index = 0; index < 6; index++) {
144 writer.WriteBit(true);
145 const auto matrix_x4 =
146 std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
147 writer.WriteScalingList(matrix_x4, index * 16, 16);
148 }
149
150 if (context.h264_parameter_set.transform_8x8_mode_flag) {
151 for (s32 index = 0; index < 2; index++) {
152 writer.WriteBit(true);
153 const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
154 context.scaling_matrix_8.end());
155
156 writer.WriteScalingList(matrix_x8, index * 64, 64);
157 }
158 }
159
160 s32 chroma_qp_index_offset2 =
161 static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
162 chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
163
164 writer.WriteSe(chroma_qp_index_offset2);
165
166 writer.End();
167
168 const auto& encoded_header = writer.GetByteArray();
169 frame.resize(encoded_header.size() + context.frame_data_size);
170 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
171
172 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
173 frame.data() + encoded_header.size(),
174 context.frame_data_size);
175 }
176
177 return frame;
178}
179
180H264BitWriter::H264BitWriter() = default;
181
182H264BitWriter::~H264BitWriter() = default;
183
184void H264BitWriter::WriteU(s32 value, s32 value_sz) {
185 WriteBits(value, value_sz);
186}
187
188void H264BitWriter::WriteSe(s32 value) {
189 WriteExpGolombCodedInt(value);
190}
191
192void H264BitWriter::WriteUe(u32 value) {
193 WriteExpGolombCodedUInt(value);
194}
195
196void H264BitWriter::End() {
197 WriteBit(true);
198 Flush();
199}
200
201void H264BitWriter::WriteBit(bool state) {
202 WriteBits(state ? 1 : 0, 1);
203}
204
205void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
206 std::vector<u8> scan(count);
207 if (count == 16) {
208 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
209 } else {
210 std::memcpy(scan.data(), zig_zag_direct.data(), scan.size());
211 }
212 u8 last_scale = 8;
213
214 for (s32 index = 0; index < count; index++) {
215 const u8 value = list[start + scan[index]];
216 const s32 delta_scale = static_cast<s32>(value - last_scale);
217
218 WriteSe(delta_scale);
219
220 last_scale = value;
221 }
222}
223
224std::vector<u8>& H264BitWriter::GetByteArray() {
225 return byte_array;
226}
227
228const std::vector<u8>& H264BitWriter::GetByteArray() const {
229 return byte_array;
230}
231
232void H264BitWriter::WriteBits(s32 value, s32 bit_count) {
233 s32 value_pos = 0;
234
235 s32 remaining = bit_count;
236
237 while (remaining > 0) {
238 s32 copy_size = remaining;
239
240 const s32 free_bits = GetFreeBufferBits();
241
242 if (copy_size > free_bits) {
243 copy_size = free_bits;
244 }
245
246 const s32 mask = (1 << copy_size) - 1;
247
248 const s32 src_shift = (bit_count - value_pos) - copy_size;
249 const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
250
251 buffer |= ((value >> src_shift) & mask) << dst_shift;
252
253 value_pos += copy_size;
254 buffer_pos += copy_size;
255 remaining -= copy_size;
256 }
257}
258
259void H264BitWriter::WriteExpGolombCodedInt(s32 value) {
260 const s32 sign = value <= 0 ? 0 : 1;
261 if (value < 0) {
262 value = -value;
263 }
264 value = (value << 1) - sign;
265 WriteExpGolombCodedUInt(value);
266}
267
268void H264BitWriter::WriteExpGolombCodedUInt(u32 value) {
269 const s32 size = 32 - Common::CountLeadingZeroes32(static_cast<s32>(value + 1));
270 WriteBits(1, size);
271
272 value -= (1U << (size - 1)) - 1;
273 WriteBits(static_cast<s32>(value), size - 1);
274}
275
276s32 H264BitWriter::GetFreeBufferBits() {
277 if (buffer_pos == buffer_size) {
278 Flush();
279 }
280
281 return buffer_size - buffer_pos;
282}
283
284void H264BitWriter::Flush() {
285 if (buffer_pos == 0) {
286 return;
287 }
288 byte_array.push_back(static_cast<u8>(buffer));
289
290 buffer = 0;
291 buffer_pos = 0;
292}
293} // namespace Tegra::Decoder
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
new file mode 100644
index 000000000..0f3a1d9f3
--- /dev/null
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -0,0 +1,118 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#pragma once
22
23#include <vector>
24#include "common/common_funcs.h"
25#include "common/common_types.h"
26#include "video_core/command_classes/nvdec_common.h"
27
28namespace Tegra {
29class GPU;
30namespace Decoder {
31
32class H264BitWriter {
33public:
34 H264BitWriter();
35 ~H264BitWriter();
36
37 /// The following Write methods are based on clause 9.1 in the H.264 specification.
38 /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
39 void WriteU(s32 value, s32 value_sz);
40 void WriteSe(s32 value);
41 void WriteUe(u32 value);
42
43 /// Finalize the bitstream
44 void End();
45
46 /// append a bit to the stream, equivalent value to the state parameter
47 void WriteBit(bool state);
48
49 /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
50 /// Writes the scaling matrices of the sream
51 void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
52
53 /// Return the bitstream as a vector.
54 [[nodiscard]] std::vector<u8>& GetByteArray();
55 [[nodiscard]] const std::vector<u8>& GetByteArray() const;
56
57private:
58 void WriteBits(s32 value, s32 bit_count);
59 void WriteExpGolombCodedInt(s32 value);
60 void WriteExpGolombCodedUInt(u32 value);
61 [[nodiscard]] s32 GetFreeBufferBits();
62 void Flush();
63
64 s32 buffer_size{8};
65
66 s32 buffer{};
67 s32 buffer_pos{};
68 std::vector<u8> byte_array;
69};
70
71class H264 {
72public:
73 explicit H264(GPU& gpu);
74 ~H264();
75
76 /// Compose the H264 header of the frame for FFmpeg decoding
77 [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(
78 const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
79
80private:
81 struct H264ParameterSet {
82 u32 log2_max_pic_order_cnt{};
83 u32 delta_pic_order_always_zero_flag{};
84 u32 frame_mbs_only_flag{};
85 u32 pic_width_in_mbs{};
86 u32 pic_height_in_map_units{};
87 INSERT_PADDING_WORDS(1);
88 u32 entropy_coding_mode_flag{};
89 u32 bottom_field_pic_order_flag{};
90 u32 num_refidx_l0_default_active{};
91 u32 num_refidx_l1_default_active{};
92 u32 deblocking_filter_control_flag{};
93 u32 redundant_pic_count_flag{};
94 u32 transform_8x8_mode_flag{};
95 INSERT_PADDING_WORDS(9);
96 u64 flags{};
97 u32 frame_number{};
98 u32 frame_number2{};
99 };
100 static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
101
102 struct H264DecoderContext {
103 INSERT_PADDING_BYTES(0x48);
104 u32 frame_data_size{};
105 INSERT_PADDING_BYTES(0xc);
106 H264ParameterSet h264_parameter_set{};
107 INSERT_PADDING_BYTES(0x100);
108 std::array<u8, 0x60> scaling_matrix_4;
109 std::array<u8, 0x80> scaling_matrix_8;
110 };
111 static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
112
113 std::vector<u8> frame;
114 GPU& gpu;
115};
116
117} // namespace Decoder
118} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
new file mode 100644
index 000000000..59e586695
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -0,0 +1,989 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring> // for std::memcpy
6#include <numeric>
7#include "video_core/command_classes/codecs/vp9.h"
8#include "video_core/gpu.h"
9#include "video_core/memory_manager.h"
10
11namespace Tegra::Decoder {
12namespace {
13// Default compressed header probabilities once frame context resets
14constexpr Vp9EntropyProbs default_probs{
15 .y_mode_prob{
16 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78,
17 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29,
18 },
19 .partition_prob{
20 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0,
21 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0,
22 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0,
23 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0,
24 },
25 .coef_probs{
26 195, 29, 183, 84, 49, 136, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27 31, 107, 169, 35, 99, 159, 17, 82, 140, 8, 66, 114, 2, 44, 76, 1, 19, 32,
28 40, 132, 201, 29, 114, 187, 13, 91, 157, 7, 75, 127, 3, 58, 95, 1, 28, 47,
29 69, 142, 221, 42, 122, 201, 15, 91, 159, 6, 67, 121, 1, 42, 77, 1, 17, 31,
30 102, 148, 228, 67, 117, 204, 17, 82, 154, 6, 59, 114, 2, 39, 75, 1, 15, 29,
31 156, 57, 233, 119, 57, 212, 58, 48, 163, 29, 40, 124, 12, 30, 81, 3, 12, 31,
32 191, 107, 226, 124, 117, 204, 25, 99, 155, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33 29, 148, 210, 37, 126, 194, 8, 93, 157, 2, 68, 118, 1, 39, 69, 1, 17, 33,
34 41, 151, 213, 27, 123, 193, 3, 82, 144, 1, 58, 105, 1, 32, 60, 1, 13, 26,
35 59, 159, 220, 23, 126, 198, 4, 88, 151, 1, 66, 114, 1, 38, 71, 1, 18, 34,
36 114, 136, 232, 51, 114, 207, 11, 83, 155, 3, 56, 105, 1, 33, 65, 1, 17, 34,
37 149, 65, 234, 121, 57, 215, 61, 49, 166, 28, 36, 114, 12, 25, 76, 3, 16, 42,
38 214, 49, 220, 132, 63, 188, 42, 65, 137, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 85, 137, 221, 104, 131, 216, 49, 111, 192, 21, 87, 155, 2, 49, 87, 1, 16, 28,
40 89, 163, 230, 90, 137, 220, 29, 100, 183, 10, 70, 135, 2, 42, 81, 1, 17, 33,
41 108, 167, 237, 55, 133, 222, 15, 97, 179, 4, 72, 135, 1, 45, 85, 1, 19, 38,
42 124, 146, 240, 66, 124, 224, 17, 88, 175, 4, 58, 122, 1, 36, 75, 1, 18, 37,
43 141, 79, 241, 126, 70, 227, 66, 58, 182, 30, 44, 136, 12, 34, 96, 2, 20, 47,
44 229, 99, 249, 143, 111, 235, 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 82, 158, 236, 94, 146, 224, 25, 117, 191, 9, 87, 149, 3, 56, 99, 1, 33, 57,
46 83, 167, 237, 68, 145, 222, 10, 103, 177, 2, 72, 131, 1, 41, 79, 1, 20, 39,
47 99, 167, 239, 47, 141, 224, 10, 104, 178, 2, 73, 133, 1, 44, 85, 1, 22, 47,
48 127, 145, 243, 71, 129, 228, 17, 93, 177, 3, 61, 124, 1, 41, 84, 1, 21, 52,
49 157, 78, 244, 140, 72, 231, 69, 58, 184, 31, 44, 137, 14, 38, 105, 8, 23, 61,
50 125, 34, 187, 52, 41, 133, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51 37, 109, 153, 51, 102, 147, 23, 87, 128, 8, 67, 101, 1, 41, 63, 1, 19, 29,
52 31, 154, 185, 17, 127, 175, 6, 96, 145, 2, 73, 114, 1, 51, 82, 1, 28, 45,
53 23, 163, 200, 10, 131, 185, 2, 93, 148, 1, 67, 111, 1, 41, 69, 1, 14, 24,
54 29, 176, 217, 12, 145, 201, 3, 101, 156, 1, 69, 111, 1, 39, 63, 1, 14, 23,
55 57, 192, 233, 25, 154, 215, 6, 109, 167, 3, 78, 118, 1, 48, 69, 1, 21, 29,
56 202, 105, 245, 108, 106, 216, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57 33, 172, 219, 64, 149, 206, 14, 117, 177, 5, 90, 141, 2, 61, 95, 1, 37, 57,
58 33, 179, 220, 11, 140, 198, 1, 89, 148, 1, 60, 104, 1, 33, 57, 1, 12, 21,
59 30, 181, 221, 8, 141, 198, 1, 87, 145, 1, 58, 100, 1, 31, 55, 1, 12, 20,
60 32, 186, 224, 7, 142, 198, 1, 86, 143, 1, 58, 100, 1, 31, 55, 1, 12, 22,
61 57, 192, 227, 20, 143, 204, 3, 96, 154, 1, 68, 112, 1, 42, 69, 1, 19, 32,
62 212, 35, 215, 113, 47, 169, 29, 48, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 74, 129, 203, 106, 120, 203, 49, 107, 178, 19, 84, 144, 4, 50, 84, 1, 15, 25,
64 71, 172, 217, 44, 141, 209, 15, 102, 173, 6, 76, 133, 2, 51, 89, 1, 24, 42,
65 64, 185, 231, 31, 148, 216, 8, 103, 175, 3, 74, 131, 1, 46, 81, 1, 18, 30,
66 65, 196, 235, 25, 157, 221, 5, 105, 174, 1, 67, 120, 1, 38, 69, 1, 15, 30,
67 65, 204, 238, 30, 156, 224, 7, 107, 177, 2, 70, 124, 1, 42, 73, 1, 18, 34,
68 225, 86, 251, 144, 104, 235, 42, 99, 181, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69 85, 175, 239, 112, 165, 229, 29, 136, 200, 12, 103, 162, 6, 77, 123, 2, 53, 84,
70 75, 183, 239, 30, 155, 221, 3, 106, 171, 1, 74, 128, 1, 44, 76, 1, 17, 28,
71 73, 185, 240, 27, 159, 222, 2, 107, 172, 1, 75, 127, 1, 42, 73, 1, 17, 29,
72 62, 190, 238, 21, 159, 222, 2, 107, 172, 1, 72, 122, 1, 40, 71, 1, 18, 32,
73 61, 199, 240, 27, 161, 226, 4, 113, 180, 1, 76, 129, 1, 46, 80, 1, 23, 41,
74 7, 27, 153, 5, 30, 95, 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75 50, 75, 127, 57, 75, 124, 27, 67, 108, 10, 54, 86, 1, 33, 52, 1, 12, 18,
76 43, 125, 151, 26, 108, 148, 7, 83, 122, 2, 59, 89, 1, 38, 60, 1, 17, 27,
77 23, 144, 163, 13, 112, 154, 2, 75, 117, 1, 50, 81, 1, 31, 51, 1, 14, 23,
78 18, 162, 185, 6, 123, 171, 1, 78, 125, 1, 51, 86, 1, 31, 54, 1, 14, 23,
79 15, 199, 227, 3, 150, 204, 1, 91, 146, 1, 55, 95, 1, 30, 53, 1, 11, 20,
80 19, 55, 240, 19, 59, 196, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81 41, 166, 207, 104, 153, 199, 31, 123, 181, 14, 101, 152, 5, 72, 106, 1, 36, 52,
82 35, 176, 211, 12, 131, 190, 2, 88, 144, 1, 60, 101, 1, 36, 60, 1, 16, 28,
83 28, 183, 213, 8, 134, 191, 1, 86, 142, 1, 56, 96, 1, 30, 53, 1, 12, 20,
84 20, 190, 215, 4, 135, 192, 1, 84, 139, 1, 53, 91, 1, 28, 49, 1, 11, 20,
85 13, 196, 216, 2, 137, 192, 1, 86, 143, 1, 57, 99, 1, 32, 56, 1, 13, 24,
86 211, 29, 217, 96, 47, 156, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 78, 120, 193, 111, 116, 186, 46, 102, 164, 15, 80, 128, 2, 49, 76, 1, 18, 28,
88 71, 161, 203, 42, 132, 192, 10, 98, 150, 3, 69, 109, 1, 44, 70, 1, 18, 29,
89 57, 186, 211, 30, 140, 196, 4, 93, 146, 1, 62, 102, 1, 38, 65, 1, 16, 27,
90 47, 199, 217, 14, 145, 196, 1, 88, 142, 1, 57, 98, 1, 36, 62, 1, 15, 26,
91 26, 219, 229, 5, 155, 207, 1, 94, 151, 1, 60, 104, 1, 36, 62, 1, 16, 28,
92 233, 29, 248, 146, 47, 220, 43, 52, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93 100, 163, 232, 179, 161, 222, 63, 142, 204, 37, 113, 174, 26, 89, 137, 18, 68, 97,
94 85, 181, 230, 32, 146, 209, 7, 100, 164, 3, 71, 121, 1, 45, 77, 1, 18, 30,
95 65, 187, 230, 20, 148, 207, 2, 97, 159, 1, 68, 116, 1, 40, 70, 1, 14, 29,
96 40, 194, 227, 8, 147, 204, 1, 94, 155, 1, 65, 112, 1, 39, 66, 1, 14, 26,
97 16, 208, 228, 3, 151, 207, 1, 98, 160, 1, 67, 117, 1, 41, 74, 1, 17, 31,
98 17, 38, 140, 7, 34, 80, 1, 17, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99 37, 75, 128, 41, 76, 128, 26, 66, 116, 12, 52, 94, 2, 32, 55, 1, 10, 16,
100 50, 127, 154, 37, 109, 152, 16, 82, 121, 5, 59, 85, 1, 35, 54, 1, 13, 20,
101 40, 142, 167, 17, 110, 157, 2, 71, 112, 1, 44, 72, 1, 27, 45, 1, 11, 17,
102 30, 175, 188, 9, 124, 169, 1, 74, 116, 1, 48, 78, 1, 30, 49, 1, 11, 18,
103 10, 222, 223, 2, 150, 194, 1, 83, 128, 1, 48, 79, 1, 27, 45, 1, 11, 17,
104 36, 41, 235, 29, 36, 193, 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105 85, 165, 222, 177, 162, 215, 110, 135, 195, 57, 113, 168, 23, 83, 120, 10, 49, 61,
106 85, 190, 223, 36, 139, 200, 5, 90, 146, 1, 60, 103, 1, 38, 65, 1, 18, 30,
107 72, 202, 223, 23, 141, 199, 2, 86, 140, 1, 56, 97, 1, 36, 61, 1, 16, 27,
108 55, 218, 225, 13, 145, 200, 1, 86, 141, 1, 57, 99, 1, 35, 61, 1, 13, 22,
109 15, 235, 212, 1, 132, 184, 1, 84, 139, 1, 57, 97, 1, 34, 56, 1, 14, 23,
110 181, 21, 201, 61, 37, 123, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
111 47, 106, 172, 95, 104, 173, 42, 93, 159, 18, 77, 131, 4, 50, 81, 1, 17, 23,
112 62, 147, 199, 44, 130, 189, 28, 102, 154, 18, 75, 115, 2, 44, 65, 1, 12, 19,
113 55, 153, 210, 24, 130, 194, 3, 93, 146, 1, 61, 97, 1, 31, 50, 1, 10, 16,
114 49, 186, 223, 17, 148, 204, 1, 96, 142, 1, 53, 83, 1, 26, 44, 1, 11, 17,
115 13, 217, 212, 2, 136, 180, 1, 78, 124, 1, 50, 83, 1, 29, 49, 1, 14, 23,
116 197, 13, 247, 82, 17, 222, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117 126, 186, 247, 234, 191, 243, 176, 177, 234, 104, 158, 220, 66, 128, 186, 55, 90, 137,
118 111, 197, 242, 46, 158, 219, 9, 104, 171, 2, 65, 125, 1, 44, 80, 1, 17, 91,
119 104, 208, 245, 39, 168, 224, 3, 109, 162, 1, 79, 124, 1, 50, 102, 1, 43, 102,
120 84, 220, 246, 31, 177, 231, 2, 115, 180, 1, 79, 134, 1, 55, 77, 1, 60, 79,
121 43, 243, 240, 8, 180, 217, 1, 115, 166, 1, 84, 121, 1, 51, 67, 1, 16, 6,
122 },
123 .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144},
124 .inter_mode_prob{
125 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94,
126 66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0,
127 },
128 .intra_inter_prob{9, 102, 187, 225},
129 .comp_inter_prob{9, 102, 187, 225, 0},
130 .single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247},
131 .comp_ref_prob{50, 126, 123, 221, 226},
132 .tx_32x32_prob{3, 136, 37, 5, 52, 13},
133 .tx_16x16_prob{20, 152, 15, 101},
134 .tx_8x8_prob{100, 66},
135 .skip_probs{192, 128, 64},
136 .joints{32, 64, 96},
137 .sign{128, 128},
138 .classes{
139 224, 144, 192, 168, 192, 176, 192, 198, 198, 245,
140 216, 128, 176, 160, 176, 176, 192, 198, 198, 208,
141 },
142 .class_0{216, 208},
143 .prob_bits{
144 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
145 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
146 },
147 .class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64},
148 .fr{64, 96, 64, 64, 96, 64},
149 .class_0_hp{160, 160},
150 .high_precision{128, 128},
151};
152
153constexpr std::array<s32, 256> norm_lut{
154 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
155 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
159 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
160 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
161 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
162};
163
164constexpr std::array<s32, 254> map_lut{
165 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
166 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54,
167 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
168 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89,
169 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
170 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124,
171 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142,
172 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159,
173 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177,
174 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194,
175 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212,
176 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17,
177 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247,
178 248, 249, 250, 251, 252, 253, 19,
179};
180
181// 6.2.14 Tile size calculation
182
183[[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) {
184 const s32 sb64_cols = (frame_width + 63) / 64;
185 s32 min_log2 = 0;
186
187 while ((64 << min_log2) < sb64_cols) {
188 min_log2++;
189 }
190
191 return min_log2;
192}
193
194[[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) {
195 const s32 sb64_cols = (frame_width + 63) / 64;
196 s32 max_log2 = 1;
197
198 while ((sb64_cols >> max_log2) >= 4) {
199 max_log2++;
200 }
201
202 return max_log2 - 1;
203}
204
205// Recenters probability. Based on section 6.3.6 of VP9 Specification
206[[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) {
207 if (new_prob > old_prob * 2) {
208 return new_prob;
209 }
210
211 if (new_prob >= old_prob) {
212 return (new_prob - old_prob) * 2;
213 }
214
215 return (old_prob - new_prob) * 2 - 1;
216}
217
218// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
219[[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) {
220 new_prob--;
221 old_prob--;
222
223 std::size_t index{};
224
225 if (old_prob * 2 <= 0xff) {
226 index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
227 } else {
228 index = static_cast<std::size_t>(
229 std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
230 }
231
232 return map_lut[index];
233}
234} // Anonymous namespace
235
236VP9::VP9(GPU& gpu_) : gpu{gpu_} {}
237
238VP9::~VP9() = default;
239
240void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
241 const bool update = new_prob != old_prob;
242
243 writer.Write(update, diff_update_probability);
244
245 if (update) {
246 WriteProbabilityDelta(writer, new_prob, old_prob);
247 }
248}
249template <typename T, std::size_t N>
250void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
251 const std::array<T, N>& old_prob) {
252 for (std::size_t offset = 0; offset < new_prob.size(); ++offset) {
253 WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]);
254 }
255}
256
257template <typename T, std::size_t N>
258void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
259 const std::array<T, N>& old_prob) {
260 for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) {
261 WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]);
262 WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]);
263 WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]);
264 }
265}
266
267void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
268 const int delta = RemapProbability(new_prob, old_prob);
269
270 EncodeTermSubExp(writer, delta);
271}
272
273void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) {
274 if (WriteLessThan(writer, value, 16)) {
275 writer.Write(value, 4);
276 } else if (WriteLessThan(writer, value, 32)) {
277 writer.Write(value - 16, 4);
278 } else if (WriteLessThan(writer, value, 64)) {
279 writer.Write(value - 32, 5);
280 } else {
281 value -= 64;
282
283 constexpr s32 size = 8;
284
285 const s32 mask = (1 << size) - 191;
286
287 const s32 delta = value - mask;
288
289 if (delta < 0) {
290 writer.Write(value, size - 1);
291 } else {
292 writer.Write(delta / 2 + mask, size - 1);
293 writer.Write(delta & 1, 1);
294 }
295 }
296}
297
298bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) {
299 const bool is_lt = value < test;
300 writer.Write(!is_lt);
301 return is_lt;
302}
303
304void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
305 const std::array<u8, 1728>& new_prob,
306 const std::array<u8, 1728>& old_prob) {
307 constexpr u32 block_bytes = 2 * 2 * 6 * 6 * 3;
308
309 const auto needs_update = [&](u32 base_index) {
310 return !std::equal(new_prob.begin() + base_index,
311 new_prob.begin() + base_index + block_bytes,
312 old_prob.begin() + base_index);
313 };
314
315 for (u32 block_index = 0; block_index < 4; block_index++) {
316 const u32 base_index = block_index * block_bytes;
317 const bool update = needs_update(base_index);
318 writer.Write(update);
319
320 if (update) {
321 u32 index = base_index;
322 for (s32 i = 0; i < 2; i++) {
323 for (s32 j = 0; j < 2; j++) {
324 for (s32 k = 0; k < 6; k++) {
325 for (s32 l = 0; l < 6; l++) {
326 if (k != 0 || l < 3) {
327 WriteProbabilityUpdate(writer, new_prob[index + 0],
328 old_prob[index + 0]);
329 WriteProbabilityUpdate(writer, new_prob[index + 1],
330 old_prob[index + 1]);
331 WriteProbabilityUpdate(writer, new_prob[index + 2],
332 old_prob[index + 2]);
333 }
334 index += 3;
335 }
336 }
337 }
338 }
339 }
340 if (block_index == static_cast<u32>(tx_mode)) {
341 break;
342 }
343 }
344}
345
346void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
347 const bool update = new_prob != old_prob;
348 writer.Write(update, diff_update_probability);
349
350 if (update) {
351 writer.Write(new_prob >> 1, 7);
352 }
353}
354
355Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
356 PictureInfo picture_info{};
357 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
358 Vp9PictureInfo vp9_info = picture_info.Convert();
359
360 InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
361
362 // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
363 // order: last, golden, altref, current. It may be worthwhile to track the updates done here
364 // to avoid buffering frame data needed for reference frame updating in the header composition.
365 std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64));
366
367 return vp9_info;
368}
369
370void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
371 EntropyProbs entropy{};
372 gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
373 entropy.Convert(dst);
374}
375
376Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) {
377 Vp9FrameContainer current_frame{};
378 {
379 gpu.SyncGuestHost();
380 current_frame.info = GetVp9PictureInfo(state);
381 current_frame.bit_stream.resize(current_frame.info.bitstream_size);
382 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
383 current_frame.info.bitstream_size);
384 }
385 // Buffer two frames, saving the last show frame info
386 if (!next_next_frame.bit_stream.empty()) {
387 Vp9FrameContainer temp{
388 .info = current_frame.info,
389 .bit_stream = std::move(current_frame.bit_stream),
390 };
391 next_next_frame.info.show_frame = current_frame.info.last_frame_shown;
392 current_frame.info = next_next_frame.info;
393 current_frame.bit_stream = std::move(next_next_frame.bit_stream);
394 next_next_frame = std::move(temp);
395
396 if (!next_frame.bit_stream.empty()) {
397 Vp9FrameContainer temp2{
398 .info = current_frame.info,
399 .bit_stream = std::move(current_frame.bit_stream),
400 };
401 next_frame.info.show_frame = current_frame.info.last_frame_shown;
402 current_frame.info = next_frame.info;
403 current_frame.bit_stream = std::move(next_frame.bit_stream);
404 next_frame = std::move(temp2);
405 } else {
406 next_frame.info = current_frame.info;
407 next_frame.bit_stream = std::move(current_frame.bit_stream);
408 }
409 } else {
410 next_next_frame.info = current_frame.info;
411 next_next_frame.bit_stream = std::move(current_frame.bit_stream);
412 }
413 return current_frame;
414}
415
416std::vector<u8> VP9::ComposeCompressedHeader() {
417 VpxRangeEncoder writer{};
418 const bool update_probs = current_frame_info.show_frame && !current_frame_info.is_key_frame;
419 if (!current_frame_info.lossless) {
420 if (static_cast<u32>(current_frame_info.transform_mode) >= 3) {
421 writer.Write(3, 2);
422 writer.Write(current_frame_info.transform_mode == 4);
423 } else {
424 writer.Write(current_frame_info.transform_mode, 2);
425 }
426 }
427
428 if (current_frame_info.transform_mode == 4) {
429 // tx_mode_probs() in the spec
430 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob,
431 prev_frame_probs.tx_8x8_prob);
432 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob,
433 prev_frame_probs.tx_16x16_prob);
434 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob,
435 prev_frame_probs.tx_32x32_prob);
436 if (update_probs) {
437 prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob;
438 prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob;
439 prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob;
440 }
441 }
442 // read_coef_probs() in the spec
443 WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode,
444 current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs);
445 // read_skip_probs() in the spec
446 WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs,
447 prev_frame_probs.skip_probs);
448
449 if (update_probs) {
450 prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs;
451 prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs;
452 }
453
454 if (!current_frame_info.intra_only) {
455 // read_inter_probs() in the spec
456 WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob,
457 prev_frame_probs.inter_mode_prob);
458
459 if (current_frame_info.interp_filter == 4) {
460 // read_interp_filter_probs() in the spec
461 WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob,
462 prev_frame_probs.switchable_interp_prob);
463 if (update_probs) {
464 prev_frame_probs.switchable_interp_prob =
465 current_frame_info.entropy.switchable_interp_prob;
466 }
467 }
468
469 // read_is_inter_probs() in the spec
470 WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob,
471 prev_frame_probs.intra_inter_prob);
472
473 // frame_reference_mode() in the spec
474 if ((current_frame_info.ref_frame_sign_bias[1] & 1) !=
475 (current_frame_info.ref_frame_sign_bias[2] & 1) ||
476 (current_frame_info.ref_frame_sign_bias[1] & 1) !=
477 (current_frame_info.ref_frame_sign_bias[3] & 1)) {
478 if (current_frame_info.reference_mode >= 1) {
479 writer.Write(1, 1);
480 writer.Write(current_frame_info.reference_mode == 2);
481 } else {
482 writer.Write(0, 1);
483 }
484 }
485
486 // frame_reference_mode_probs() in the spec
487 if (current_frame_info.reference_mode == 2) {
488 WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob,
489 prev_frame_probs.comp_inter_prob);
490 if (update_probs) {
491 prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob;
492 }
493 }
494
495 if (current_frame_info.reference_mode != 1) {
496 WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob,
497 prev_frame_probs.single_ref_prob);
498 if (update_probs) {
499 prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob;
500 }
501 }
502
503 if (current_frame_info.reference_mode != 0) {
504 WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob,
505 prev_frame_probs.comp_ref_prob);
506 if (update_probs) {
507 prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob;
508 }
509 }
510
511 // read_y_mode_probs
512 for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size();
513 ++index) {
514 WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index],
515 prev_frame_probs.y_mode_prob[index]);
516 }
517
518 // read_partition_probs
519 WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob,
520 prev_frame_probs.partition_prob);
521
522 // mv_probs
523 for (s32 i = 0; i < 3; i++) {
524 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i],
525 prev_frame_probs.joints[i]);
526 }
527 if (update_probs) {
528 prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob;
529 prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob;
530 prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob;
531 prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob;
532 prev_frame_probs.joints = current_frame_info.entropy.joints;
533 }
534
535 for (s32 i = 0; i < 2; i++) {
536 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i],
537 prev_frame_probs.sign[i]);
538 for (s32 j = 0; j < 10; j++) {
539 const int index = i * 10 + j;
540 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index],
541 prev_frame_probs.classes[index]);
542 }
543 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i],
544 prev_frame_probs.class_0[i]);
545
546 for (s32 j = 0; j < 10; j++) {
547 const int index = i * 10 + j;
548 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index],
549 prev_frame_probs.prob_bits[index]);
550 }
551 }
552
553 for (s32 i = 0; i < 2; i++) {
554 for (s32 j = 0; j < 2; j++) {
555 for (s32 k = 0; k < 3; k++) {
556 const int index = i * 2 * 3 + j * 3 + k;
557 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index],
558 prev_frame_probs.class_0_fr[index]);
559 }
560 }
561
562 for (s32 j = 0; j < 3; j++) {
563 const int index = i * 3 + j;
564 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index],
565 prev_frame_probs.fr[index]);
566 }
567 }
568
569 if (current_frame_info.allow_high_precision_mv) {
570 for (s32 index = 0; index < 2; index++) {
571 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index],
572 prev_frame_probs.class_0_hp[index]);
573 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index],
574 prev_frame_probs.high_precision[index]);
575 }
576 }
577
578 // save previous probs
579 if (update_probs) {
580 prev_frame_probs.sign = current_frame_info.entropy.sign;
581 prev_frame_probs.classes = current_frame_info.entropy.classes;
582 prev_frame_probs.class_0 = current_frame_info.entropy.class_0;
583 prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits;
584 prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr;
585 prev_frame_probs.fr = current_frame_info.entropy.fr;
586 prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp;
587 prev_frame_probs.high_precision = current_frame_info.entropy.high_precision;
588 }
589 }
590 writer.End();
591 return writer.GetBuffer();
592}
593
594VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
595 VpxBitStreamWriter uncomp_writer{};
596
597 uncomp_writer.WriteU(2, 2); // Frame marker.
598 uncomp_writer.WriteU(0, 2); // Profile.
599 uncomp_writer.WriteBit(false); // Show existing frame.
600 uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame?
601 uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame?
602 uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience
603
604 if (current_frame_info.is_key_frame) {
605 uncomp_writer.WriteU(frame_sync_code, 24);
606 uncomp_writer.WriteU(0, 3); // Color space.
607 uncomp_writer.WriteU(0, 1); // Color range.
608 uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
609 uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
610 uncomp_writer.WriteBit(false); // Render and frame size different.
611
612 // Reset context
613 prev_frame_probs = default_probs;
614 swap_next_golden = false;
615 loop_filter_ref_deltas.fill(0);
616 loop_filter_mode_deltas.fill(0);
617
618 // allow frames offsets to stabilize before checking for golden frames
619 grace_period = 4;
620
621 // On key frames, all frame slots are set to the current frame,
622 // so the value of the selected slot doesn't really matter.
623 frame_ctxs.fill({current_frame_number, false, default_probs});
624
625 // intra only, meaning the frame can be recreated with no other references
626 current_frame_info.intra_only = true;
627
628 } else {
629
630 if (!current_frame_info.show_frame) {
631 uncomp_writer.WriteBit(current_frame_info.intra_only);
632 if (!current_frame_info.last_frame_was_key) {
633 swap_next_golden = !swap_next_golden;
634 }
635 } else {
636 current_frame_info.intra_only = false;
637 }
638 if (!current_frame_info.error_resilient_mode) {
639 uncomp_writer.WriteU(0, 2); // Reset frame context.
640 }
641
642 // Last, Golden, Altref frames
643 std::array<s32, 3> ref_frame_index{0, 1, 2};
644
645 // Set when next frame is hidden
646 // altref and golden references are swapped
647 if (swap_next_golden) {
648 ref_frame_index = std::array<s32, 3>{0, 2, 1};
649 }
650
651 // update Last Frame
652 u64 refresh_frame_flags = 1;
653
654 // golden frame may refresh, determined if the next golden frame offset is changed
655 bool golden_refresh = false;
656 if (grace_period <= 0) {
657 for (s32 index = 1; index < 3; ++index) {
658 if (current_frame_info.frame_offsets[index] !=
659 next_frame.info.frame_offsets[index]) {
660 current_frame_info.refresh_frame[index] = true;
661 golden_refresh = true;
662 grace_period = 3;
663 }
664 }
665 }
666
667 if (current_frame_info.show_frame &&
668 (!next_frame.info.show_frame || next_frame.info.is_key_frame)) {
669 // Update golden frame
670 refresh_frame_flags = swap_next_golden ? 2 : 4;
671 }
672
673 if (!current_frame_info.show_frame) {
674 // Update altref
675 refresh_frame_flags = swap_next_golden ? 2 : 4;
676 } else if (golden_refresh) {
677 refresh_frame_flags = 3;
678 }
679
680 if (current_frame_info.intra_only) {
681 uncomp_writer.WriteU(frame_sync_code, 24);
682 uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
683 uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
684 uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
685 uncomp_writer.WriteBit(false); // Render and frame size different.
686 } else {
687 uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
688
689 for (s32 index = 1; index < 4; index++) {
690 uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
691 uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
692 }
693
694 uncomp_writer.WriteBit(true); // Frame size with refs.
695 uncomp_writer.WriteBit(false); // Render and frame size different.
696 uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
697 uncomp_writer.WriteBit(current_frame_info.interp_filter == 4);
698
699 if (current_frame_info.interp_filter != 4) {
700 uncomp_writer.WriteU(current_frame_info.interp_filter, 2);
701 }
702 }
703 }
704
705 if (!current_frame_info.error_resilient_mode) {
706 uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from?
707 uncomp_writer.WriteBit(true); // Frame parallel decoding mode.
708 }
709
710 int frame_ctx_idx = 0;
711 if (!current_frame_info.show_frame) {
712 frame_ctx_idx = 1;
713 }
714
715 uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
716 prev_frame_probs =
717 frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header
718 frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy};
719
720 uncomp_writer.WriteU(current_frame_info.first_level, 6);
721 uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
722 uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled);
723
724 if (current_frame_info.mode_ref_delta_enabled) {
725 // check if ref deltas are different, update accordingly
726 std::array<bool, 4> update_loop_filter_ref_deltas;
727 std::array<bool, 2> update_loop_filter_mode_deltas;
728
729 bool loop_filter_delta_update = false;
730
731 for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
732 const s8 old_deltas = loop_filter_ref_deltas[index];
733 const s8 new_deltas = current_frame_info.ref_deltas[index];
734 const bool differing_delta = old_deltas != new_deltas;
735
736 update_loop_filter_ref_deltas[index] = differing_delta;
737 loop_filter_delta_update |= differing_delta;
738 }
739
740 for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
741 const s8 old_deltas = loop_filter_mode_deltas[index];
742 const s8 new_deltas = current_frame_info.mode_deltas[index];
743 const bool differing_delta = old_deltas != new_deltas;
744
745 update_loop_filter_mode_deltas[index] = differing_delta;
746 loop_filter_delta_update |= differing_delta;
747 }
748
749 uncomp_writer.WriteBit(loop_filter_delta_update);
750
751 if (loop_filter_delta_update) {
752 for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
753 uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]);
754
755 if (update_loop_filter_ref_deltas[index]) {
756 uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6);
757 }
758 }
759
760 for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
761 uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]);
762
763 if (update_loop_filter_mode_deltas[index]) {
764 uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6);
765 }
766 }
767 // save new deltas
768 loop_filter_ref_deltas = current_frame_info.ref_deltas;
769 loop_filter_mode_deltas = current_frame_info.mode_deltas;
770 }
771 }
772
773 uncomp_writer.WriteU(current_frame_info.base_q_index, 8);
774
775 uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q);
776 uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
777 uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
778
779 uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
780
781 const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
782 const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width);
783
784 const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2;
785 const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1;
786
787 // If it's less than the maximum, we need to add an extra 0 on the bitstream
788 // to indicate that it should stop reading.
789 if (current_frame_info.log2_tile_cols < max_tile_cols_log2) {
790 uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1);
791 } else {
792 uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff);
793 }
794
795 const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0;
796
797 uncomp_writer.WriteBit(tile_rows_log2_is_nonzero);
798
799 if (tile_rows_log2_is_nonzero) {
800 uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1);
801 }
802
803 return uncomp_writer;
804}
805
806const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state) {
807 std::vector<u8> bitstream;
808 {
809 Vp9FrameContainer curr_frame = GetCurrentFrame(state);
810 current_frame_info = curr_frame.info;
811 bitstream = std::move(curr_frame.bit_stream);
812 }
813
814 // The uncompressed header routine sets PrevProb parameters needed for the compressed header
815 auto uncomp_writer = ComposeUncompressedHeader();
816 std::vector<u8> compressed_header = ComposeCompressedHeader();
817
818 uncomp_writer.WriteU(static_cast<s32>(compressed_header.size()), 16);
819 uncomp_writer.Flush();
820 std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray();
821
822 // Write headers and frame to buffer
823 frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
824 std::memcpy(frame.data(), uncompressed_header.data(), uncompressed_header.size());
825 std::memcpy(frame.data() + uncompressed_header.size(), compressed_header.data(),
826 compressed_header.size());
827 std::memcpy(frame.data() + uncompressed_header.size() + compressed_header.size(),
828 bitstream.data(), bitstream.size());
829
830 // keep track of frame number
831 current_frame_number++;
832 grace_period--;
833
834 // don't display hidden frames
835 hidden = !current_frame_info.show_frame;
836 return frame;
837}
838
839VpxRangeEncoder::VpxRangeEncoder() {
840 Write(false);
841}
842
843VpxRangeEncoder::~VpxRangeEncoder() = default;
844
845void VpxRangeEncoder::Write(s32 value, s32 value_size) {
846 for (s32 bit = value_size - 1; bit >= 0; bit--) {
847 Write(((value >> bit) & 1) != 0);
848 }
849}
850
851void VpxRangeEncoder::Write(bool bit) {
852 Write(bit, half_probability);
853}
854
855void VpxRangeEncoder::Write(bool bit, s32 probability) {
856 u32 local_range = range;
857 const u32 split = 1 + (((local_range - 1) * static_cast<u32>(probability)) >> 8);
858 local_range = split;
859
860 if (bit) {
861 low_value += split;
862 local_range = range - split;
863 }
864
865 s32 shift = norm_lut[local_range];
866 local_range <<= shift;
867 count += shift;
868
869 if (count >= 0) {
870 const s32 offset = shift - count;
871
872 if (((low_value << (offset - 1)) >> 31) != 0) {
873 const s32 current_pos = static_cast<s32>(base_stream.GetPosition());
874 base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
875 while (PeekByte() == 0xff) {
876 base_stream.WriteByte(0);
877
878 base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos);
879 }
880 base_stream.WriteByte(static_cast<u8>((PeekByte() + 1)));
881 base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin);
882 }
883 base_stream.WriteByte(static_cast<u8>((low_value >> (24 - offset))));
884
885 low_value <<= offset;
886 shift = count;
887 low_value &= 0xffffff;
888 count -= 8;
889 }
890
891 low_value <<= shift;
892 range = local_range;
893}
894
895void VpxRangeEncoder::End() {
896 for (std::size_t index = 0; index < 32; ++index) {
897 Write(false);
898 }
899}
900
901u8 VpxRangeEncoder::PeekByte() {
902 const u8 value = base_stream.ReadByte();
903 base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
904
905 return value;
906}
907
908VpxBitStreamWriter::VpxBitStreamWriter() = default;
909
910VpxBitStreamWriter::~VpxBitStreamWriter() = default;
911
912void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) {
913 WriteBits(value, value_size);
914}
915
916void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) {
917 const bool sign = value < 0;
918 if (sign) {
919 value = -value;
920 }
921
922 WriteBits(static_cast<u32>(value << 1) | (sign ? 1 : 0), value_size + 1);
923}
924
925void VpxBitStreamWriter::WriteDeltaQ(u32 value) {
926 const bool delta_coded = value != 0;
927 WriteBit(delta_coded);
928
929 if (delta_coded) {
930 WriteBits(value, 4);
931 }
932}
933
934void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) {
935 s32 value_pos = 0;
936 s32 remaining = bit_count;
937
938 while (remaining > 0) {
939 s32 copy_size = remaining;
940
941 const s32 free = GetFreeBufferBits();
942
943 if (copy_size > free) {
944 copy_size = free;
945 }
946
947 const s32 mask = (1 << copy_size) - 1;
948
949 const s32 src_shift = (bit_count - value_pos) - copy_size;
950 const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
951
952 buffer |= ((value >> src_shift) & mask) << dst_shift;
953
954 value_pos += copy_size;
955 buffer_pos += copy_size;
956 remaining -= copy_size;
957 }
958}
959
960void VpxBitStreamWriter::WriteBit(bool state) {
961 WriteBits(state ? 1 : 0, 1);
962}
963
964s32 VpxBitStreamWriter::GetFreeBufferBits() {
965 if (buffer_pos == buffer_size) {
966 Flush();
967 }
968
969 return buffer_size - buffer_pos;
970}
971
972void VpxBitStreamWriter::Flush() {
973 if (buffer_pos == 0) {
974 return;
975 }
976 byte_array.push_back(static_cast<u8>(buffer));
977 buffer = 0;
978 buffer_pos = 0;
979}
980
981std::vector<u8>& VpxBitStreamWriter::GetByteArray() {
982 return byte_array;
983}
984
985const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const {
986 return byte_array;
987}
988
989} // namespace Tegra::Decoder
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
new file mode 100644
index 000000000..8396c8105
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -0,0 +1,197 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <vector>
9
10#include "common/common_types.h"
11#include "common/stream.h"
12#include "video_core/command_classes/codecs/vp9_types.h"
13#include "video_core/command_classes/nvdec_common.h"
14
15namespace Tegra {
16class GPU;
17enum class FrameType { KeyFrame = 0, InterFrame = 1 };
18namespace Decoder {
19
20/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
21/// VP9 header bitstreams.
22
23class VpxRangeEncoder {
24public:
25 VpxRangeEncoder();
26 ~VpxRangeEncoder();
27
28 VpxRangeEncoder(const VpxRangeEncoder&) = delete;
29 VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete;
30
31 VpxRangeEncoder(VpxRangeEncoder&&) = default;
32 VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default;
33
34 /// Writes the rightmost value_size bits from value into the stream
35 void Write(s32 value, s32 value_size);
36
37 /// Writes a single bit with half probability
38 void Write(bool bit);
39
40 /// Writes a bit to the base_stream encoded with probability
41 void Write(bool bit, s32 probability);
42
43 /// Signal the end of the bitstream
44 void End();
45
46 [[nodiscard]] std::vector<u8>& GetBuffer() {
47 return base_stream.GetBuffer();
48 }
49
50 [[nodiscard]] const std::vector<u8>& GetBuffer() const {
51 return base_stream.GetBuffer();
52 }
53
54private:
55 u8 PeekByte();
56 Common::Stream base_stream{};
57 u32 low_value{};
58 u32 range{0xff};
59 s32 count{-24};
60 s32 half_probability{128};
61};
62
63class VpxBitStreamWriter {
64public:
65 VpxBitStreamWriter();
66 ~VpxBitStreamWriter();
67
68 VpxBitStreamWriter(const VpxBitStreamWriter&) = delete;
69 VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete;
70
71 VpxBitStreamWriter(VpxBitStreamWriter&&) = default;
72 VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default;
73
74 /// Write an unsigned integer value
75 void WriteU(u32 value, u32 value_size);
76
77 /// Write a signed integer value
78 void WriteS(s32 value, u32 value_size);
79
80 /// Based on 6.2.10 of VP9 Spec, writes a delta coded value
81 void WriteDeltaQ(u32 value);
82
83 /// Write a single bit.
84 void WriteBit(bool state);
85
86 /// Pushes current buffer into buffer_array, resets buffer
87 void Flush();
88
89 /// Returns byte_array
90 [[nodiscard]] std::vector<u8>& GetByteArray();
91
92 /// Returns const byte_array
93 [[nodiscard]] const std::vector<u8>& GetByteArray() const;
94
95private:
96 /// Write bit_count bits from value into buffer
97 void WriteBits(u32 value, u32 bit_count);
98
99 /// Gets next available position in buffer, invokes Flush() if buffer is full
100 s32 GetFreeBufferBits();
101
102 s32 buffer_size{8};
103
104 s32 buffer{};
105 s32 buffer_pos{};
106 std::vector<u8> byte_array;
107};
108
109class VP9 {
110public:
111 explicit VP9(GPU& gpu_);
112 ~VP9();
113
114 VP9(const VP9&) = delete;
115 VP9& operator=(const VP9&) = delete;
116
117 VP9(VP9&&) = default;
118 VP9& operator=(VP9&&) = delete;
119
120 /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
121 /// documentation
122 [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(
123 const NvdecCommon::NvdecRegisters& state);
124
125 /// Returns true if the most recent frame was a hidden frame.
126 [[nodiscard]] bool WasFrameHidden() const {
127 return hidden;
128 }
129
130private:
131 /// Generates compressed header probability updates in the bitstream writer
132 template <typename T, std::size_t N>
133 void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
134 const std::array<T, N>& old_prob);
135
136 /// Generates compressed header probability updates in the bitstream writer
137 /// If probs are not equal, WriteProbabilityDelta is invoked
138 void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
139
140 /// Generates compressed header probability deltas in the bitstream writer
141 void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
142
143 /// Inverse of 6.3.4 Decode term subexp
144 void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
145
146 /// Writes if the value is less than the test value
147 bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test);
148
149 /// Writes probability updates for the Coef probabilities
150 void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
151 const std::array<u8, 1728>& new_prob,
152 const std::array<u8, 1728>& old_prob);
153
154 /// Write probabilities for 4-byte aligned structures
155 template <typename T, std::size_t N>
156 void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
157 const std::array<T, N>& old_prob);
158
159 /// Write motion vector probability updates. 6.3.17 in the spec
160 void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
161
162 /// Returns VP9 information from NVDEC provided offset and size
163 [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
164
165 /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
166 void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
167
168 /// Returns frame to be decoded after buffering
169 [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
170
171 /// Use NVDEC providied information to compose the headers for the current frame
172 [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
173 [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
174
175 GPU& gpu;
176 std::vector<u8> frame;
177
178 std::array<s8, 4> loop_filter_ref_deltas{};
179 std::array<s8, 2> loop_filter_mode_deltas{};
180
181 bool hidden = false;
182 s64 current_frame_number = -2; // since we buffer 2 frames
183 s32 grace_period = 6; // frame offsets need to stabilize
184 std::array<FrameContexts, 4> frame_ctxs{};
185 Vp9FrameContainer next_frame{};
186 Vp9FrameContainer next_next_frame{};
187 bool swap_next_golden{};
188
189 Vp9PictureInfo current_frame_info{};
190 Vp9EntropyProbs prev_frame_probs{};
191
192 s32 diff_update_probability = 252;
193 s32 frame_sync_code = 0x498342;
194};
195
196} // namespace Decoder
197} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
new file mode 100644
index 000000000..139501a1c
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -0,0 +1,302 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstring>
9#include <vector>
10#include "common/common_funcs.h"
11#include "common/common_types.h"
12
13namespace Tegra {
14class GPU;
15
16namespace Decoder {
17struct Vp9FrameDimensions {
18 s16 width{};
19 s16 height{};
20 s16 luma_pitch{};
21 s16 chroma_pitch{};
22};
23static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
24
25enum FrameFlags : u32 {
26 IsKeyFrame = 1 << 0,
27 LastFrameIsKeyFrame = 1 << 1,
28 FrameSizeChanged = 1 << 2,
29 ErrorResilientMode = 1 << 3,
30 LastShowFrame = 1 << 4,
31 IntraOnly = 1 << 5,
32};
33
34enum class TxSize {
35 Tx4x4 = 0, // 4x4 transform
36 Tx8x8 = 1, // 8x8 transform
37 Tx16x16 = 2, // 16x16 transform
38 Tx32x32 = 3, // 32x32 transform
39 TxSizes = 4
40};
41
42enum class TxMode {
43 Only4X4 = 0, // Only 4x4 transform used
44 Allow8X8 = 1, // Allow block transform size up to 8x8
45 Allow16X16 = 2, // Allow block transform size up to 16x16
46 Allow32X32 = 3, // Allow block transform size up to 32x32
47 TxModeSelect = 4, // Transform specified for each block
48 TxModes = 5
49};
50
51struct Segmentation {
52 u8 enabled{};
53 u8 update_map{};
54 u8 temporal_update{};
55 u8 abs_delta{};
56 std::array<u32, 8> feature_mask{};
57 std::array<std::array<s16, 4>, 8> feature_data{};
58};
59static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
60
61struct LoopFilter {
62 u8 mode_ref_delta_enabled{};
63 std::array<s8, 4> ref_deltas{};
64 std::array<s8, 2> mode_deltas{};
65};
66static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
67
68struct Vp9EntropyProbs {
69 std::array<u8, 36> y_mode_prob{};
70 std::array<u8, 64> partition_prob{};
71 std::array<u8, 1728> coef_probs{};
72 std::array<u8, 8> switchable_interp_prob{};
73 std::array<u8, 28> inter_mode_prob{};
74 std::array<u8, 4> intra_inter_prob{};
75 std::array<u8, 5> comp_inter_prob{};
76 std::array<u8, 10> single_ref_prob{};
77 std::array<u8, 5> comp_ref_prob{};
78 std::array<u8, 6> tx_32x32_prob{};
79 std::array<u8, 4> tx_16x16_prob{};
80 std::array<u8, 2> tx_8x8_prob{};
81 std::array<u8, 3> skip_probs{};
82 std::array<u8, 3> joints{};
83 std::array<u8, 2> sign{};
84 std::array<u8, 20> classes{};
85 std::array<u8, 2> class_0{};
86 std::array<u8, 20> prob_bits{};
87 std::array<u8, 12> class_0_fr{};
88 std::array<u8, 6> fr{};
89 std::array<u8, 2> class_0_hp{};
90 std::array<u8, 2> high_precision{};
91};
92static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
93
94struct Vp9PictureInfo {
95 bool is_key_frame{};
96 bool intra_only{};
97 bool last_frame_was_key{};
98 bool frame_size_changed{};
99 bool error_resilient_mode{};
100 bool last_frame_shown{};
101 bool show_frame{};
102 std::array<s8, 4> ref_frame_sign_bias{};
103 s32 base_q_index{};
104 s32 y_dc_delta_q{};
105 s32 uv_dc_delta_q{};
106 s32 uv_ac_delta_q{};
107 bool lossless{};
108 s32 transform_mode{};
109 bool allow_high_precision_mv{};
110 s32 interp_filter{};
111 s32 reference_mode{};
112 s8 comp_fixed_ref{};
113 std::array<s8, 2> comp_var_ref{};
114 s32 log2_tile_cols{};
115 s32 log2_tile_rows{};
116 bool segment_enabled{};
117 bool segment_map_update{};
118 bool segment_map_temporal_update{};
119 s32 segment_abs_delta{};
120 std::array<u32, 8> segment_feature_enable{};
121 std::array<std::array<s16, 4>, 8> segment_feature_data{};
122 bool mode_ref_delta_enabled{};
123 bool use_prev_in_find_mv_refs{};
124 std::array<s8, 4> ref_deltas{};
125 std::array<s8, 2> mode_deltas{};
126 Vp9EntropyProbs entropy{};
127 Vp9FrameDimensions frame_size{};
128 u8 first_level{};
129 u8 sharpness_level{};
130 u32 bitstream_size{};
131 std::array<u64, 4> frame_offsets{};
132 std::array<bool, 4> refresh_frame{};
133};
134
135struct Vp9FrameContainer {
136 Vp9PictureInfo info{};
137 std::vector<u8> bit_stream;
138};
139
140struct PictureInfo {
141 INSERT_PADDING_WORDS(12);
142 u32 bitstream_size{};
143 INSERT_PADDING_WORDS(5);
144 Vp9FrameDimensions last_frame_size{};
145 Vp9FrameDimensions golden_frame_size{};
146 Vp9FrameDimensions alt_frame_size{};
147 Vp9FrameDimensions current_frame_size{};
148 u32 vp9_flags{};
149 std::array<s8, 4> ref_frame_sign_bias{};
150 u8 first_level{};
151 u8 sharpness_level{};
152 u8 base_q_index{};
153 u8 y_dc_delta_q{};
154 u8 uv_ac_delta_q{};
155 u8 uv_dc_delta_q{};
156 u8 lossless{};
157 u8 tx_mode{};
158 u8 allow_high_precision_mv{};
159 u8 interp_filter{};
160 u8 reference_mode{};
161 s8 comp_fixed_ref{};
162 std::array<s8, 2> comp_var_ref{};
163 u8 log2_tile_cols{};
164 u8 log2_tile_rows{};
165 Segmentation segmentation{};
166 LoopFilter loop_filter{};
167 INSERT_PADDING_BYTES(5);
168 u32 surface_params{};
169 INSERT_PADDING_WORDS(3);
170
171 [[nodiscard]] Vp9PictureInfo Convert() const {
172 return {
173 .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
174 .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
175 .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
176 .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
177 .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
178 .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
179 .ref_frame_sign_bias = ref_frame_sign_bias,
180 .base_q_index = base_q_index,
181 .y_dc_delta_q = y_dc_delta_q,
182 .uv_dc_delta_q = uv_dc_delta_q,
183 .uv_ac_delta_q = uv_ac_delta_q,
184 .lossless = lossless != 0,
185 .transform_mode = tx_mode,
186 .allow_high_precision_mv = allow_high_precision_mv != 0,
187 .interp_filter = interp_filter,
188 .reference_mode = reference_mode,
189 .comp_fixed_ref = comp_fixed_ref,
190 .comp_var_ref = comp_var_ref,
191 .log2_tile_cols = log2_tile_cols,
192 .log2_tile_rows = log2_tile_rows,
193 .segment_enabled = segmentation.enabled != 0,
194 .segment_map_update = segmentation.update_map != 0,
195 .segment_map_temporal_update = segmentation.temporal_update != 0,
196 .segment_abs_delta = segmentation.abs_delta,
197 .segment_feature_enable = segmentation.feature_mask,
198 .segment_feature_data = segmentation.feature_data,
199 .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
200 .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
201 !(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
202 !(vp9_flags == (FrameFlags::IntraOnly)) &&
203 (vp9_flags == (FrameFlags::LastShowFrame)) &&
204 !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
205 .ref_deltas = loop_filter.ref_deltas,
206 .mode_deltas = loop_filter.mode_deltas,
207 .frame_size = current_frame_size,
208 .first_level = first_level,
209 .sharpness_level = sharpness_level,
210 .bitstream_size = bitstream_size,
211 };
212 }
213};
214static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
215
216struct EntropyProbs {
217 INSERT_PADDING_BYTES(1024);
218 std::array<u8, 28> inter_mode_prob{};
219 std::array<u8, 4> intra_inter_prob{};
220 INSERT_PADDING_BYTES(80);
221 std::array<u8, 2> tx_8x8_prob{};
222 std::array<u8, 4> tx_16x16_prob{};
223 std::array<u8, 6> tx_32x32_prob{};
224 std::array<u8, 4> y_mode_prob_e8{};
225 std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
226 INSERT_PADDING_BYTES(64);
227 std::array<u8, 64> partition_prob{};
228 INSERT_PADDING_BYTES(10);
229 std::array<u8, 8> switchable_interp_prob{};
230 std::array<u8, 5> comp_inter_prob{};
231 std::array<u8, 3> skip_probs{};
232 INSERT_PADDING_BYTES(1);
233 std::array<u8, 3> joints{};
234 std::array<u8, 2> sign{};
235 std::array<u8, 2> class_0{};
236 std::array<u8, 6> fr{};
237 std::array<u8, 2> class_0_hp{};
238 std::array<u8, 2> high_precision{};
239 std::array<u8, 20> classes{};
240 std::array<u8, 12> class_0_fr{};
241 std::array<u8, 20> pred_bits{};
242 std::array<u8, 10> single_ref_prob{};
243 std::array<u8, 5> comp_ref_prob{};
244 INSERT_PADDING_BYTES(17);
245 std::array<u8, 2304> coef_probs{};
246
247 void Convert(Vp9EntropyProbs& fc) {
248 fc.inter_mode_prob = inter_mode_prob;
249 fc.intra_inter_prob = intra_inter_prob;
250 fc.tx_8x8_prob = tx_8x8_prob;
251 fc.tx_16x16_prob = tx_16x16_prob;
252 fc.tx_32x32_prob = tx_32x32_prob;
253
254 for (std::size_t i = 0; i < 4; i++) {
255 for (std::size_t j = 0; j < 9; j++) {
256 fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i];
257 }
258 }
259
260 fc.partition_prob = partition_prob;
261 fc.switchable_interp_prob = switchable_interp_prob;
262 fc.comp_inter_prob = comp_inter_prob;
263 fc.skip_probs = skip_probs;
264 fc.joints = joints;
265 fc.sign = sign;
266 fc.class_0 = class_0;
267 fc.fr = fr;
268 fc.class_0_hp = class_0_hp;
269 fc.high_precision = high_precision;
270 fc.classes = classes;
271 fc.class_0_fr = class_0_fr;
272 fc.prob_bits = pred_bits;
273 fc.single_ref_prob = single_ref_prob;
274 fc.comp_ref_prob = comp_ref_prob;
275
276 // Skip the 4th element as it goes unused
277 for (std::size_t i = 0; i < coef_probs.size(); i += 4) {
278 const std::size_t j = i - i / 4;
279 fc.coef_probs[j] = coef_probs[i];
280 fc.coef_probs[j + 1] = coef_probs[i + 1];
281 fc.coef_probs[j + 2] = coef_probs[i + 2];
282 }
283 }
284};
285static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size");
286
287enum class Ref { Last, Golden, AltRef };
288
289struct RefPoolElement {
290 s64 frame{};
291 Ref ref{};
292 bool refresh{};
293};
294
295struct FrameContexts {
296 s64 from{};
297 bool adapted{};
298 Vp9EntropyProbs probs{};
299};
300
301}; // namespace Decoder
302}; // namespace Tegra
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
new file mode 100644
index 000000000..b12494528
--- /dev/null
+++ b/src/video_core/command_classes/host1x.cpp
@@ -0,0 +1,30 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/command_classes/host1x.h"
7#include "video_core/gpu.h"
8
9Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
10
11Tegra::Host1x::~Host1x() = default;
12
13void Tegra::Host1x::ProcessMethod(Method method, u32 argument) {
14 switch (method) {
15 case Method::LoadSyncptPayload32:
16 syncpoint_value = argument;
17 break;
18 case Method::WaitSyncpt:
19 case Method::WaitSyncpt32:
20 Execute(argument);
21 break;
22 default:
23 UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
24 break;
25 }
26}
27
28void Tegra::Host1x::Execute(u32 data) {
29 gpu.WaitFence(data, syncpoint_value);
30}
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h
new file mode 100644
index 000000000..7e94799dd
--- /dev/null
+++ b/src/video_core/command_classes/host1x.h
@@ -0,0 +1,37 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10
11namespace Tegra {
12class GPU;
13class Nvdec;
14
15class Host1x {
16public:
17 enum class Method : u32 {
18 WaitSyncpt = 0x8,
19 LoadSyncptPayload32 = 0x4e,
20 WaitSyncpt32 = 0x50,
21 };
22
23 explicit Host1x(GPU& gpu);
24 ~Host1x();
25
26 /// Writes the method into the state, Invoke Execute() if encountered
27 void ProcessMethod(Method method, u32 argument);
28
29private:
30 /// For Host1x, execute is waiting on a syncpoint previously written into the state
31 void Execute(u32 data);
32
33 u32 syncpoint_value{};
34 GPU& gpu;
35};
36
37} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
new file mode 100644
index 000000000..79e1f4e13
--- /dev/null
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -0,0 +1,48 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/command_classes/nvdec.h"
7#include "video_core/gpu.h"
8
9namespace Tegra {
10
11Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
12
13Nvdec::~Nvdec() = default;
14
15void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) {
16 if (method == Method::SetVideoCodec) {
17 codec->StateWrite(static_cast<u32>(method), arguments[0]);
18 } else {
19 codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8);
20 }
21
22 switch (method) {
23 case Method::SetVideoCodec:
24 codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0]));
25 break;
26 case Method::Execute:
27 Execute();
28 break;
29 }
30}
31
32AVFramePtr Nvdec::GetFrame() {
33 return codec->GetCurrentFrame();
34}
35
36void Nvdec::Execute() {
37 switch (codec->GetCurrentCodec()) {
38 case NvdecCommon::VideoCodec::H264:
39 case NvdecCommon::VideoCodec::Vp9:
40 codec->Decode();
41 break;
42 default:
43 UNIMPLEMENTED_MSG("Unknown codec {}", static_cast<u32>(codec->GetCurrentCodec()));
44 break;
45 }
46}
47
48} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
new file mode 100644
index 000000000..e4877c533
--- /dev/null
+++ b/src/video_core/command_classes/nvdec.h
@@ -0,0 +1,38 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9#include "common/common_types.h"
10#include "video_core/command_classes/codecs/codec.h"
11
12namespace Tegra {
13class GPU;
14
15class Nvdec {
16public:
17 enum class Method : u32 {
18 SetVideoCodec = 0x80,
19 Execute = 0xc0,
20 };
21
22 explicit Nvdec(GPU& gpu);
23 ~Nvdec();
24
25 /// Writes the method into the state, Invoke Execute() if encountered
26 void ProcessMethod(Method method, const std::vector<u32>& arguments);
27
28 /// Return most recently decoded frame
29 [[nodiscard]] AVFramePtr GetFrame();
30
31private:
32 /// Invoke codec to decode a frame
33 void Execute();
34
35 GPU& gpu;
36 std::unique_ptr<Codec> codec;
37};
38} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h
new file mode 100644
index 000000000..01b5e086d
--- /dev/null
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -0,0 +1,48 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "common/common_types.h"
9
10namespace Tegra::NvdecCommon {
11
12struct NvdecRegisters {
13 INSERT_PADDING_WORDS(256);
14 u64 set_codec_id{};
15 INSERT_PADDING_WORDS(254);
16 u64 set_platform_id{};
17 u64 picture_info_offset{};
18 u64 frame_bitstream_offset{};
19 u64 frame_number{};
20 u64 h264_slice_data_offsets{};
21 u64 h264_mv_dump_offset{};
22 INSERT_PADDING_WORDS(6);
23 u64 frame_stats_offset{};
24 u64 h264_last_surface_luma_offset{};
25 u64 h264_last_surface_chroma_offset{};
26 std::array<u64, 17> surface_luma_offset{};
27 std::array<u64, 17> surface_chroma_offset{};
28 INSERT_PADDING_WORDS(132);
29 u64 vp9_entropy_probs_offset{};
30 u64 vp9_backward_updates_offset{};
31 u64 vp9_last_frame_segmap_offset{};
32 u64 vp9_curr_frame_segmap_offset{};
33 INSERT_PADDING_WORDS(2);
34 u64 vp9_last_frame_mvs_offset{};
35 u64 vp9_curr_frame_mvs_offset{};
36 INSERT_PADDING_WORDS(2);
37};
38static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
39
40enum class VideoCodec : u32 {
41 None = 0x0,
42 H264 = 0x3,
43 Vp8 = 0x5,
44 H265 = 0x7,
45 Vp9 = 0x9,
46};
47
48} // namespace Tegra::NvdecCommon
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp
new file mode 100644
index 000000000..19dc9e0ab
--- /dev/null
+++ b/src/video_core/command_classes/sync_manager.cpp
@@ -0,0 +1,60 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#include <algorithm>
22#include "sync_manager.h"
23#include "video_core/gpu.h"
24
25namespace Tegra {
26SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
27SyncptIncrManager::~SyncptIncrManager() = default;
28
29void SyncptIncrManager::Increment(u32 id) {
30 increments.emplace_back(0, 0, id, true);
31 IncrementAllDone();
32}
33
34u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
35 const u32 handle = current_id++;
36 increments.emplace_back(handle, class_id, id);
37 return handle;
38}
39
40void SyncptIncrManager::SignalDone(u32 handle) {
41 const auto done_incr =
42 std::find_if(increments.begin(), increments.end(),
43 [handle](const SyncptIncr& incr) { return incr.id == handle; });
44 if (done_incr != increments.cend()) {
45 done_incr->complete = true;
46 }
47 IncrementAllDone();
48}
49
50void SyncptIncrManager::IncrementAllDone() {
51 std::size_t done_count = 0;
52 for (; done_count < increments.size(); ++done_count) {
53 if (!increments[done_count].complete) {
54 break;
55 }
56 gpu.IncrementSyncPoint(increments[done_count].syncpt_id);
57 }
58 increments.erase(increments.begin(), increments.begin() + done_count);
59}
60} // namespace Tegra
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h
new file mode 100644
index 000000000..2c321ec58
--- /dev/null
+++ b/src/video_core/command_classes/sync_manager.h
@@ -0,0 +1,64 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#pragma once
22
23#include <mutex>
24#include <vector>
25#include "common/common_types.h"
26
27namespace Tegra {
28class GPU;
29struct SyncptIncr {
30 u32 id;
31 u32 class_id;
32 u32 syncpt_id;
33 bool complete;
34
35 SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
36 : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
37};
38
39class SyncptIncrManager {
40public:
41 explicit SyncptIncrManager(GPU& gpu);
42 ~SyncptIncrManager();
43
44 /// Add syncpoint id and increment all
45 void Increment(u32 id);
46
47 /// Returns a handle to increment later
48 u32 IncrementWhenDone(u32 class_id, u32 id);
49
50 /// IncrememntAllDone, including handle
51 void SignalDone(u32 handle);
52
53 /// Increment all sequential pending increments that are already done.
54 void IncrementAllDone();
55
56private:
57 std::vector<SyncptIncr> increments;
58 std::mutex increment_lock;
59 u32 current_id{};
60
61 GPU& gpu;
62};
63
64} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
new file mode 100644
index 000000000..55e632346
--- /dev/null
+++ b/src/video_core/command_classes/vic.cpp
@@ -0,0 +1,175 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include "common/assert.h"
7#include "video_core/command_classes/nvdec.h"
8#include "video_core/command_classes/vic.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/gpu.h"
11#include "video_core/memory_manager.h"
12#include "video_core/textures/decoders.h"
13
14extern "C" {
15#include <libswscale/swscale.h>
16}
17
18namespace Tegra {
19
20Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
21 : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {}
22Vic::~Vic() = default;
23
24void Vic::VicStateWrite(u32 offset, u32 arguments) {
25 u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32);
26 std::memcpy(state_offset, &arguments, sizeof(u32));
27}
28
29void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
30 LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", method);
31 VicStateWrite(static_cast<u32>(method), arguments[0]);
32 const u64 arg = static_cast<u64>(arguments[0]) << 8;
33 switch (method) {
34 case Method::Execute:
35 Execute();
36 break;
37 case Method::SetConfigStructOffset:
38 config_struct_address = arg;
39 break;
40 case Method::SetOutputSurfaceLumaOffset:
41 output_surface_luma_address = arg;
42 break;
43 case Method::SetOutputSurfaceChromaUOffset:
44 output_surface_chroma_u_address = arg;
45 break;
46 case Method::SetOutputSurfaceChromaVOffset:
47 output_surface_chroma_v_address = arg;
48 break;
49 default:
50 break;
51 }
52}
53
54void Vic::Execute() {
55 if (output_surface_luma_address == 0) {
56 LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}",
57 vic_state.output_surface.luma_offset);
58 return;
59 }
60 const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
61 const AVFramePtr frame_ptr = nvdec_processor->GetFrame();
62 const auto* frame = frame_ptr.get();
63 if (!frame || frame->width == 0 || frame->height == 0) {
64 return;
65 }
66 const VideoPixelFormat pixel_format =
67 static_cast<VideoPixelFormat>(config.pixel_format.Value());
68 switch (pixel_format) {
69 case VideoPixelFormat::BGRA8:
70 case VideoPixelFormat::RGBA8: {
71 LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
72
73 if (scaler_ctx == nullptr || frame->width != scaler_width ||
74 frame->height != scaler_height) {
75 const AVPixelFormat target_format =
76 (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA;
77
78 sws_freeContext(scaler_ctx);
79 scaler_ctx = nullptr;
80
81 // FFmpeg returns all frames in YUV420, convert it into expected format
82 scaler_ctx =
83 sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width,
84 frame->height, target_format, 0, nullptr, nullptr, nullptr);
85
86 scaler_width = frame->width;
87 scaler_height = frame->height;
88 }
89 // Get Converted frame
90 const std::size_t linear_size = frame->width * frame->height * 4;
91
92 using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
93 AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free};
94
95 const int converted_stride{frame->width * 4};
96 u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
97
98 sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height,
99 &converted_frame_buf_addr, &converted_stride);
100
101 const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
102 if (blk_kind != 0) {
103 // swizzle pitch linear to block linear
104 const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
105 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
106 block_height, 0);
107 std::vector<u8> swizzled_data(size);
108 Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
109 frame->width, 4, swizzled_data.data(),
110 converted_frame_buffer.get(), block_height, 0, 0);
111
112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
113 gpu.Maxwell3D().OnMemoryWrite();
114 } else {
115 // send pitch linear frame
116 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
117 linear_size);
118 gpu.Maxwell3D().OnMemoryWrite();
119 }
120 break;
121 }
122 case VideoPixelFormat::Yuv420: {
123 LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
124
125 const std::size_t surface_width = config.surface_width_minus1 + 1;
126 const std::size_t surface_height = config.surface_height_minus1 + 1;
127 const std::size_t half_width = surface_width / 2;
128 const std::size_t half_height = config.surface_height_minus1 / 2;
129 const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
130
131 const auto* luma_ptr = frame->data[0];
132 const auto* chroma_b_ptr = frame->data[1];
133 const auto* chroma_r_ptr = frame->data[2];
134 const auto stride = frame->linesize[0];
135 const auto half_stride = frame->linesize[1];
136
137 std::vector<u8> luma_buffer(aligned_width * surface_height);
138 std::vector<u8> chroma_buffer(aligned_width * half_height);
139
140 // Populate luma buffer
141 for (std::size_t y = 0; y < surface_height - 1; ++y) {
142 std::size_t src = y * stride;
143 std::size_t dst = y * aligned_width;
144
145 std::size_t size = surface_width;
146
147 for (std::size_t offset = 0; offset < size; ++offset) {
148 luma_buffer[dst + offset] = luma_ptr[src + offset];
149 }
150 }
151 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
152 luma_buffer.size());
153
154 // Populate chroma buffer from both channels with interleaving.
155 for (std::size_t y = 0; y < half_height; ++y) {
156 std::size_t src = y * half_stride;
157 std::size_t dst = y * aligned_width;
158
159 for (std::size_t x = 0; x < half_width; ++x) {
160 chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x];
161 chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x];
162 }
163 }
164 gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
165 chroma_buffer.size());
166 gpu.Maxwell3D().OnMemoryWrite();
167 break;
168 }
169 default:
170 UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value());
171 break;
172 }
173}
174
175} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
new file mode 100644
index 000000000..8c4e284a1
--- /dev/null
+++ b/src/video_core/command_classes/vic.h
@@ -0,0 +1,110 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9#include "common/bit_field.h"
10#include "common/common_types.h"
11
12struct SwsContext;
13
14namespace Tegra {
15class GPU;
16class Nvdec;
17
18struct PlaneOffsets {
19 u32 luma_offset{};
20 u32 chroma_u_offset{};
21 u32 chroma_v_offset{};
22};
23
24struct VicRegisters {
25 INSERT_PADDING_WORDS(64);
26 u32 nop{};
27 INSERT_PADDING_WORDS(15);
28 u32 pm_trigger{};
29 INSERT_PADDING_WORDS(47);
30 u32 set_application_id{};
31 u32 set_watchdog_timer{};
32 INSERT_PADDING_WORDS(17);
33 u32 context_save_area{};
34 u32 context_switch{};
35 INSERT_PADDING_WORDS(43);
36 u32 execute{};
37 INSERT_PADDING_WORDS(63);
38 std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{};
39 u32 picture_index{};
40 u32 control_params{};
41 u32 config_struct_offset{};
42 u32 filter_struct_offset{};
43 u32 palette_offset{};
44 u32 hist_offset{};
45 u32 context_id{};
46 u32 fce_ucode_size{};
47 PlaneOffsets output_surface{};
48 u32 fce_ucode_offset{};
49 INSERT_PADDING_WORDS(4);
50 std::array<u32, 8> slot_context_id{};
51 INSERT_PADDING_WORDS(16);
52};
53static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size");
54
55class Vic {
56public:
57 enum class Method : u32 {
58 Execute = 0xc0,
59 SetControlParams = 0x1c1,
60 SetConfigStructOffset = 0x1c2,
61 SetOutputSurfaceLumaOffset = 0x1c8,
62 SetOutputSurfaceChromaUOffset = 0x1c9,
63 SetOutputSurfaceChromaVOffset = 0x1ca
64 };
65
66 explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
67 ~Vic();
68
69 /// Write to the device state.
70 void ProcessMethod(Method method, const std::vector<u32>& arguments);
71
72private:
73 void Execute();
74
75 void VicStateWrite(u32 offset, u32 arguments);
76 VicRegisters vic_state{};
77
78 enum class VideoPixelFormat : u64_le {
79 RGBA8 = 0x1f,
80 BGRA8 = 0x20,
81 Yuv420 = 0x44,
82 };
83
84 union VicConfig {
85 u64_le raw{};
86 BitField<0, 7, u64_le> pixel_format;
87 BitField<7, 2, u64_le> chroma_loc_horiz;
88 BitField<9, 2, u64_le> chroma_loc_vert;
89 BitField<11, 4, u64_le> block_linear_kind;
90 BitField<15, 4, u64_le> block_linear_height_log2;
91 BitField<19, 3, u64_le> reserved0;
92 BitField<22, 10, u64_le> reserved1;
93 BitField<32, 14, u64_le> surface_width_minus1;
94 BitField<46, 14, u64_le> surface_height_minus1;
95 };
96
97 GPU& gpu;
98 std::shared_ptr<Tegra::Nvdec> nvdec_processor;
99
100 GPUVAddr config_struct_address{};
101 GPUVAddr output_surface_luma_address{};
102 GPUVAddr output_surface_chroma_u_address{};
103 GPUVAddr output_surface_chroma_v_address{};
104
105 SwsContext* scaler_ctx{};
106 s32 scaler_width{};
107 s32 scaler_height{};
108};
109
110} // namespace Tegra