summaryrefslogtreecommitdiff
path: root/src/video_core/command_classes
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/command_classes')
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp114
-rw-r--r--src/video_core/command_classes/codecs/codec.h68
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp276
-rw-r--r--src/video_core/command_classes/codecs/h264.h130
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp1010
-rw-r--r--src/video_core/command_classes/codecs/vp9.h216
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h369
-rw-r--r--src/video_core/command_classes/host1x.cpp39
-rw-r--r--src/video_core/command_classes/host1x.h78
-rw-r--r--src/video_core/command_classes/nvdec.cpp56
-rw-r--r--src/video_core/command_classes/nvdec.h39
-rw-r--r--src/video_core/command_classes/nvdec_common.h48
-rw-r--r--src/video_core/command_classes/sync_manager.cpp60
-rw-r--r--src/video_core/command_classes/sync_manager.h64
-rw-r--r--src/video_core/command_classes/vic.cpp180
-rw-r--r--src/video_core/command_classes/vic.h110
16 files changed, 2857 insertions, 0 deletions
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
new file mode 100644
index 000000000..2df410be8
--- /dev/null
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -0,0 +1,114 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <fstream>
7#include "common/assert.h"
8#include "video_core/command_classes/codecs/codec.h"
9#include "video_core/command_classes/codecs/h264.h"
10#include "video_core/command_classes/codecs/vp9.h"
11#include "video_core/gpu.h"
12#include "video_core/memory_manager.h"
13
14extern "C" {
15#include <libavutil/opt.h>
16}
17
18namespace Tegra {
19
20Codec::Codec(GPU& gpu_)
21 : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
22 vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
23
24Codec::~Codec() {
25 if (!initialized) {
26 return;
27 }
28 // Free libav memory
29 avcodec_send_packet(av_codec_ctx, nullptr);
30 avcodec_receive_frame(av_codec_ctx, av_frame);
31 avcodec_flush_buffers(av_codec_ctx);
32
33 av_frame_unref(av_frame);
34 av_free(av_frame);
35 avcodec_close(av_codec_ctx);
36}
37
38void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
39 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
40 current_codec = codec;
41}
42
43void Codec::StateWrite(u32 offset, u64 arguments) {
44 u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
45 std::memcpy(state_offset, &arguments, sizeof(u64));
46}
47
48void Codec::Decode() {
49 bool is_first_frame = false;
50
51 if (!initialized) {
52 if (current_codec == NvdecCommon::VideoCodec::H264) {
53 av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
54 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
55 av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
56 } else {
57 LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast<u32>(current_codec));
58 return;
59 }
60
61 av_codec_ctx = avcodec_alloc_context3(av_codec);
62 av_frame = av_frame_alloc();
63 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
64
65 // TODO(ameerj): libavcodec gpu hw acceleration
66
67 const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
68 if (av_error < 0) {
69 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
70 av_frame_unref(av_frame);
71 av_free(av_frame);
72 avcodec_close(av_codec_ctx);
73 return;
74 }
75 initialized = true;
76 is_first_frame = true;
77 }
78 bool vp9_hidden_frame = false;
79
80 AVPacket packet{};
81 av_init_packet(&packet);
82 std::vector<u8> frame_data;
83
84 if (current_codec == NvdecCommon::VideoCodec::H264) {
85 frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
86 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
87 frame_data = vp9_decoder->ComposeFrameHeader(state);
88 vp9_hidden_frame = vp9_decoder->WasFrameHidden();
89 }
90
91 packet.data = frame_data.data();
92 packet.size = static_cast<int>(frame_data.size());
93
94 avcodec_send_packet(av_codec_ctx, &packet);
95
96 if (!vp9_hidden_frame) {
97 // Only receive/store visible frames
98 avcodec_receive_frame(av_codec_ctx, av_frame);
99 }
100}
101
102AVFrame* Codec::GetCurrentFrame() {
103 return av_frame;
104}
105
106const AVFrame* Codec::GetCurrentFrame() const {
107 return av_frame;
108}
109
110NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
111 return current_codec;
112}
113
114} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
new file mode 100644
index 000000000..2e56daf29
--- /dev/null
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -0,0 +1,68 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "video_core/command_classes/nvdec_common.h"
12
13extern "C" {
14#if defined(__GNUC__) || defined(__clang__)
15#pragma GCC diagnostic ignored "-Wconversion"
16#endif
17#include <libavcodec/avcodec.h>
18#if defined(__GNUC__) || defined(__clang__)
19#pragma GCC diagnostic pop
20#endif
21}
22
23namespace Tegra {
24class GPU;
25struct VicRegisters;
26
27namespace Decoder {
28class H264;
29class VP9;
30} // namespace Decoder
31
32class Codec {
33public:
34 explicit Codec(GPU& gpu);
35 ~Codec();
36
37 /// Sets NVDEC video stream codec
38 void SetTargetCodec(NvdecCommon::VideoCodec codec);
39
40 /// Populate NvdecRegisters state with argument value at the provided offset
41 void StateWrite(u32 offset, u64 arguments);
42
43 /// Call decoders to construct headers, decode AVFrame with ffmpeg
44 void Decode();
45
46 /// Returns most recently decoded frame
47 AVFrame* GetCurrentFrame();
48 const AVFrame* GetCurrentFrame() const;
49
50 /// Returns the value of current_codec
51 NvdecCommon::VideoCodec GetCurrentCodec() const;
52
53private:
54 bool initialized{};
55 NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
56
57 AVCodec* av_codec{nullptr};
58 AVCodecContext* av_codec_ctx{nullptr};
59 AVFrame* av_frame{nullptr};
60
61 GPU& gpu;
62 std::unique_ptr<Decoder::H264> h264_decoder;
63 std::unique_ptr<Decoder::VP9> vp9_decoder;
64
65 NvdecCommon::NvdecRegisters state{};
66};
67
68} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
new file mode 100644
index 000000000..1a39f7b23
--- /dev/null
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -0,0 +1,276 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#include "common/bit_util.h"
22#include "video_core/command_classes/codecs/h264.h"
23#include "video_core/gpu.h"
24#include "video_core/memory_manager.h"
25
26namespace Tegra::Decoder {
27H264::H264(GPU& gpu_) : gpu(gpu_) {}
28
29H264::~H264() = default;
30
31std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) {
32 H264DecoderContext context{};
33 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
34
35 const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
36 if (!is_first_frame && frame_number != 0) {
37 frame.resize(context.frame_data_size);
38
39 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
40 } else {
41 /// Encode header
42 H264BitWriter writer{};
43 writer.WriteU(1, 24);
44 writer.WriteU(0, 1);
45 writer.WriteU(3, 2);
46 writer.WriteU(7, 5);
47 writer.WriteU(100, 8);
48 writer.WriteU(0, 8);
49 writer.WriteU(31, 8);
50 writer.WriteUe(0);
51 const s32 chroma_format_idc = (context.h264_parameter_set.flags >> 12) & 0x3;
52 writer.WriteUe(chroma_format_idc);
53 if (chroma_format_idc == 3) {
54 writer.WriteBit(false);
55 }
56
57 writer.WriteUe(0);
58 writer.WriteUe(0);
59 writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
60 writer.WriteBit(false); // Scaling matrix present flag
61
62 const s32 order_cnt_type = static_cast<s32>((context.h264_parameter_set.flags >> 14) & 3);
63 writer.WriteUe(static_cast<s32>((context.h264_parameter_set.flags >> 8) & 0xf));
64 writer.WriteUe(order_cnt_type);
65 if (order_cnt_type == 0) {
66 writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
67 } else if (order_cnt_type == 1) {
68 writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
69
70 writer.WriteSe(0);
71 writer.WriteSe(0);
72 writer.WriteUe(0);
73 }
74
75 const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
76 (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
77
78 writer.WriteUe(16);
79 writer.WriteBit(false);
80 writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
81 writer.WriteUe(pic_height - 1);
82 writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
83
84 if (!context.h264_parameter_set.frame_mbs_only_flag) {
85 writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
86 }
87
88 writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
89 writer.WriteBit(false); // Frame cropping flag
90 writer.WriteBit(false); // VUI parameter present flag
91
92 writer.End();
93
94 // H264 PPS
95 writer.WriteU(1, 24);
96 writer.WriteU(0, 1);
97 writer.WriteU(3, 2);
98 writer.WriteU(8, 5);
99
100 writer.WriteUe(0);
101 writer.WriteUe(0);
102
103 writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag);
104 writer.WriteBit(false);
105 writer.WriteUe(0);
106 writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
107 writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
108 writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
109 writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
110 s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
111 pic_init_qp = (pic_init_qp << 26) >> 26;
112 writer.WriteSe(pic_init_qp);
113 writer.WriteSe(0);
114 s32 chroma_qp_index_offset =
115 static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
116 chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
117
118 writer.WriteSe(chroma_qp_index_offset);
119 writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
120 writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
121 writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
122 writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
123
124 writer.WriteBit(true);
125
126 for (s32 index = 0; index < 6; index++) {
127 writer.WriteBit(true);
128 const auto matrix_x4 =
129 std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
130 writer.WriteScalingList(matrix_x4, index * 16, 16);
131 }
132
133 if (context.h264_parameter_set.transform_8x8_mode_flag) {
134 for (s32 index = 0; index < 2; index++) {
135 writer.WriteBit(true);
136 const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
137 context.scaling_matrix_8.end());
138
139 writer.WriteScalingList(matrix_x8, index * 64, 64);
140 }
141 }
142
143 s32 chroma_qp_index_offset2 =
144 static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
145 chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
146
147 writer.WriteSe(chroma_qp_index_offset2);
148
149 writer.End();
150
151 const auto& encoded_header = writer.GetByteArray();
152 frame.resize(encoded_header.size() + context.frame_data_size);
153 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
154
155 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
156 frame.data() + encoded_header.size(),
157 context.frame_data_size);
158 }
159
160 return frame;
161}
162
163H264BitWriter::H264BitWriter() = default;
164
165H264BitWriter::~H264BitWriter() = default;
166
167void H264BitWriter::WriteU(s32 value, s32 value_sz) {
168 WriteBits(value, value_sz);
169}
170
171void H264BitWriter::WriteSe(s32 value) {
172 WriteExpGolombCodedInt(value);
173}
174
175void H264BitWriter::WriteUe(s32 value) {
176 WriteExpGolombCodedUInt((u32)value);
177}
178
179void H264BitWriter::End() {
180 WriteBit(true);
181 Flush();
182}
183
184void H264BitWriter::WriteBit(bool state) {
185 WriteBits(state ? 1 : 0, 1);
186}
187
188void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
189 std::vector<u8> scan(count);
190 if (count == 16) {
191 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
192 } else {
193 std::memcpy(scan.data(), zig_zag_direct.data(), scan.size());
194 }
195 u8 last_scale = 8;
196
197 for (s32 index = 0; index < count; index++) {
198 const u8 value = list[start + scan[index]];
199 const s32 delta_scale = static_cast<s32>(value - last_scale);
200
201 WriteSe(delta_scale);
202
203 last_scale = value;
204 }
205}
206
207std::vector<u8>& H264BitWriter::GetByteArray() {
208 return byte_array;
209}
210
211const std::vector<u8>& H264BitWriter::GetByteArray() const {
212 return byte_array;
213}
214
215void H264BitWriter::WriteBits(s32 value, s32 bit_count) {
216 s32 value_pos = 0;
217
218 s32 remaining = bit_count;
219
220 while (remaining > 0) {
221 s32 copy_size = remaining;
222
223 const s32 free_bits = GetFreeBufferBits();
224
225 if (copy_size > free_bits) {
226 copy_size = free_bits;
227 }
228
229 const s32 mask = (1 << copy_size) - 1;
230
231 const s32 src_shift = (bit_count - value_pos) - copy_size;
232 const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
233
234 buffer |= ((value >> src_shift) & mask) << dst_shift;
235
236 value_pos += copy_size;
237 buffer_pos += copy_size;
238 remaining -= copy_size;
239 }
240}
241
242void H264BitWriter::WriteExpGolombCodedInt(s32 value) {
243 const s32 sign = value <= 0 ? 0 : 1;
244 if (value < 0) {
245 value = -value;
246 }
247 value = (value << 1) - sign;
248 WriteExpGolombCodedUInt(value);
249}
250
251void H264BitWriter::WriteExpGolombCodedUInt(u32 value) {
252 const s32 size = 32 - Common::CountLeadingZeroes32(static_cast<s32>(value + 1));
253 WriteBits(1, size);
254
255 value -= (1U << (size - 1)) - 1;
256 WriteBits(static_cast<s32>(value), size - 1);
257}
258
259s32 H264BitWriter::GetFreeBufferBits() {
260 if (buffer_pos == buffer_size) {
261 Flush();
262 }
263
264 return buffer_size - buffer_pos;
265}
266
267void H264BitWriter::Flush() {
268 if (buffer_pos == 0) {
269 return;
270 }
271 byte_array.push_back(static_cast<u8>(buffer));
272
273 buffer = 0;
274 buffer_pos = 0;
275}
276} // namespace Tegra::Decoder
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
new file mode 100644
index 000000000..21752dd90
--- /dev/null
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -0,0 +1,130 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#pragma once
22
23#include <vector>
24#include "common/common_funcs.h"
25#include "common/common_types.h"
26#include "video_core/command_classes/nvdec_common.h"
27
28namespace Tegra {
29class GPU;
30namespace Decoder {
31
32class H264BitWriter {
33public:
34 H264BitWriter();
35 ~H264BitWriter();
36
37 /// The following Write methods are based on clause 9.1 in the H.264 specification.
38 /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
39 void WriteU(s32 value, s32 value_sz);
40 void WriteSe(s32 value);
41 void WriteUe(s32 value);
42
43 /// Finalize the bitstream
44 void End();
45
46 /// append a bit to the stream, equivalent value to the state parameter
47 void WriteBit(bool state);
48
49 /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
50 /// Writes the scaling matrices of the sream
51 void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
52
53 /// Return the bitstream as a vector.
54 std::vector<u8>& GetByteArray();
55 const std::vector<u8>& GetByteArray() const;
56
57private:
58 // ZigZag LUTs from libavcodec.
59 static constexpr std::array<u8, 64> zig_zag_direct{
60 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48,
61 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
62 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
63 };
64
65 static constexpr std::array<u8, 16> zig_zag_scan{
66 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
67 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
68 };
69
70 void WriteBits(s32 value, s32 bit_count);
71 void WriteExpGolombCodedInt(s32 value);
72 void WriteExpGolombCodedUInt(u32 value);
73 s32 GetFreeBufferBits();
74 void Flush();
75
76 s32 buffer_size{8};
77
78 s32 buffer{};
79 s32 buffer_pos{};
80 std::vector<u8> byte_array;
81};
82
83class H264 {
84public:
85 explicit H264(GPU& gpu);
86 ~H264();
87
88 /// Compose the H264 header of the frame for FFmpeg decoding
89 std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
90 bool is_first_frame = false);
91
92private:
93 struct H264ParameterSet {
94 u32 log2_max_pic_order_cnt{};
95 u32 delta_pic_order_always_zero_flag{};
96 u32 frame_mbs_only_flag{};
97 u32 pic_width_in_mbs{};
98 u32 pic_height_in_map_units{};
99 INSERT_PADDING_WORDS(1);
100 u32 entropy_coding_mode_flag{};
101 u32 bottom_field_pic_order_flag{};
102 u32 num_refidx_l0_default_active{};
103 u32 num_refidx_l1_default_active{};
104 u32 deblocking_filter_control_flag{};
105 u32 redundant_pic_count_flag{};
106 u32 transform_8x8_mode_flag{};
107 INSERT_PADDING_WORDS(9);
108 u64 flags{};
109 u32 frame_number{};
110 u32 frame_number2{};
111 };
112 static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
113
114 struct H264DecoderContext {
115 INSERT_PADDING_BYTES(0x48);
116 u32 frame_data_size{};
117 INSERT_PADDING_BYTES(0xc);
118 H264ParameterSet h264_parameter_set{};
119 INSERT_PADDING_BYTES(0x100);
120 std::array<u8, 0x60> scaling_matrix_4;
121 std::array<u8, 0x80> scaling_matrix_8;
122 };
123 static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
124
125 std::vector<u8> frame;
126 GPU& gpu;
127};
128
129} // namespace Decoder
130} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
new file mode 100644
index 000000000..3bae0bb5d
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -0,0 +1,1010 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring> // for std::memcpy
6#include <numeric>
7#include "video_core/command_classes/codecs/vp9.h"
8#include "video_core/gpu.h"
9#include "video_core/memory_manager.h"
10
11namespace Tegra::Decoder {
12
13// Default compressed header probabilities once frame context resets
14constexpr Vp9EntropyProbs default_probs{
15 .y_mode_prob{
16 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78,
17 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29,
18 },
19 .partition_prob{
20 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0,
21 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0,
22 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0,
23 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0,
24 },
25 .coef_probs{
26 195, 29, 183, 0, 84, 49, 136, 0, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27 0, 0, 0, 0, 31, 107, 169, 0, 35, 99, 159, 0, 17, 82, 140, 0, 8, 66, 114, 0,
28 2, 44, 76, 0, 1, 19, 32, 0, 40, 132, 201, 0, 29, 114, 187, 0, 13, 91, 157, 0,
29 7, 75, 127, 0, 3, 58, 95, 0, 1, 28, 47, 0, 69, 142, 221, 0, 42, 122, 201, 0,
30 15, 91, 159, 0, 6, 67, 121, 0, 1, 42, 77, 0, 1, 17, 31, 0, 102, 148, 228, 0,
31 67, 117, 204, 0, 17, 82, 154, 0, 6, 59, 114, 0, 2, 39, 75, 0, 1, 15, 29, 0,
32 156, 57, 233, 0, 119, 57, 212, 0, 58, 48, 163, 0, 29, 40, 124, 0, 12, 30, 81, 0,
33 3, 12, 31, 0, 191, 107, 226, 0, 124, 117, 204, 0, 25, 99, 155, 0, 0, 0, 0, 0,
34 0, 0, 0, 0, 0, 0, 0, 0, 29, 148, 210, 0, 37, 126, 194, 0, 8, 93, 157, 0,
35 2, 68, 118, 0, 1, 39, 69, 0, 1, 17, 33, 0, 41, 151, 213, 0, 27, 123, 193, 0,
36 3, 82, 144, 0, 1, 58, 105, 0, 1, 32, 60, 0, 1, 13, 26, 0, 59, 159, 220, 0,
37 23, 126, 198, 0, 4, 88, 151, 0, 1, 66, 114, 0, 1, 38, 71, 0, 1, 18, 34, 0,
38 114, 136, 232, 0, 51, 114, 207, 0, 11, 83, 155, 0, 3, 56, 105, 0, 1, 33, 65, 0,
39 1, 17, 34, 0, 149, 65, 234, 0, 121, 57, 215, 0, 61, 49, 166, 0, 28, 36, 114, 0,
40 12, 25, 76, 0, 3, 16, 42, 0, 214, 49, 220, 0, 132, 63, 188, 0, 42, 65, 137, 0,
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 137, 221, 0, 104, 131, 216, 0,
42 49, 111, 192, 0, 21, 87, 155, 0, 2, 49, 87, 0, 1, 16, 28, 0, 89, 163, 230, 0,
43 90, 137, 220, 0, 29, 100, 183, 0, 10, 70, 135, 0, 2, 42, 81, 0, 1, 17, 33, 0,
44 108, 167, 237, 0, 55, 133, 222, 0, 15, 97, 179, 0, 4, 72, 135, 0, 1, 45, 85, 0,
45 1, 19, 38, 0, 124, 146, 240, 0, 66, 124, 224, 0, 17, 88, 175, 0, 4, 58, 122, 0,
46 1, 36, 75, 0, 1, 18, 37, 0, 141, 79, 241, 0, 126, 70, 227, 0, 66, 58, 182, 0,
47 30, 44, 136, 0, 12, 34, 96, 0, 2, 20, 47, 0, 229, 99, 249, 0, 143, 111, 235, 0,
48 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 158, 236, 0,
49 94, 146, 224, 0, 25, 117, 191, 0, 9, 87, 149, 0, 3, 56, 99, 0, 1, 33, 57, 0,
50 83, 167, 237, 0, 68, 145, 222, 0, 10, 103, 177, 0, 2, 72, 131, 0, 1, 41, 79, 0,
51 1, 20, 39, 0, 99, 167, 239, 0, 47, 141, 224, 0, 10, 104, 178, 0, 2, 73, 133, 0,
52 1, 44, 85, 0, 1, 22, 47, 0, 127, 145, 243, 0, 71, 129, 228, 0, 17, 93, 177, 0,
53 3, 61, 124, 0, 1, 41, 84, 0, 1, 21, 52, 0, 157, 78, 244, 0, 140, 72, 231, 0,
54 69, 58, 184, 0, 31, 44, 137, 0, 14, 38, 105, 0, 8, 23, 61, 0, 125, 34, 187, 0,
55 52, 41, 133, 0, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56 37, 109, 153, 0, 51, 102, 147, 0, 23, 87, 128, 0, 8, 67, 101, 0, 1, 41, 63, 0,
57 1, 19, 29, 0, 31, 154, 185, 0, 17, 127, 175, 0, 6, 96, 145, 0, 2, 73, 114, 0,
58 1, 51, 82, 0, 1, 28, 45, 0, 23, 163, 200, 0, 10, 131, 185, 0, 2, 93, 148, 0,
59 1, 67, 111, 0, 1, 41, 69, 0, 1, 14, 24, 0, 29, 176, 217, 0, 12, 145, 201, 0,
60 3, 101, 156, 0, 1, 69, 111, 0, 1, 39, 63, 0, 1, 14, 23, 0, 57, 192, 233, 0,
61 25, 154, 215, 0, 6, 109, 167, 0, 3, 78, 118, 0, 1, 48, 69, 0, 1, 21, 29, 0,
62 202, 105, 245, 0, 108, 106, 216, 0, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 0, 0, 0, 0, 33, 172, 219, 0, 64, 149, 206, 0, 14, 117, 177, 0, 5, 90, 141, 0,
64 2, 61, 95, 0, 1, 37, 57, 0, 33, 179, 220, 0, 11, 140, 198, 0, 1, 89, 148, 0,
65 1, 60, 104, 0, 1, 33, 57, 0, 1, 12, 21, 0, 30, 181, 221, 0, 8, 141, 198, 0,
66 1, 87, 145, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 20, 0, 32, 186, 224, 0,
67 7, 142, 198, 0, 1, 86, 143, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 22, 0,
68 57, 192, 227, 0, 20, 143, 204, 0, 3, 96, 154, 0, 1, 68, 112, 0, 1, 42, 69, 0,
69 1, 19, 32, 0, 212, 35, 215, 0, 113, 47, 169, 0, 29, 48, 105, 0, 0, 0, 0, 0,
70 0, 0, 0, 0, 0, 0, 0, 0, 74, 129, 203, 0, 106, 120, 203, 0, 49, 107, 178, 0,
71 19, 84, 144, 0, 4, 50, 84, 0, 1, 15, 25, 0, 71, 172, 217, 0, 44, 141, 209, 0,
72 15, 102, 173, 0, 6, 76, 133, 0, 2, 51, 89, 0, 1, 24, 42, 0, 64, 185, 231, 0,
73 31, 148, 216, 0, 8, 103, 175, 0, 3, 74, 131, 0, 1, 46, 81, 0, 1, 18, 30, 0,
74 65, 196, 235, 0, 25, 157, 221, 0, 5, 105, 174, 0, 1, 67, 120, 0, 1, 38, 69, 0,
75 1, 15, 30, 0, 65, 204, 238, 0, 30, 156, 224, 0, 7, 107, 177, 0, 2, 70, 124, 0,
76 1, 42, 73, 0, 1, 18, 34, 0, 225, 86, 251, 0, 144, 104, 235, 0, 42, 99, 181, 0,
77 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 175, 239, 0, 112, 165, 229, 0,
78 29, 136, 200, 0, 12, 103, 162, 0, 6, 77, 123, 0, 2, 53, 84, 0, 75, 183, 239, 0,
79 30, 155, 221, 0, 3, 106, 171, 0, 1, 74, 128, 0, 1, 44, 76, 0, 1, 17, 28, 0,
80 73, 185, 240, 0, 27, 159, 222, 0, 2, 107, 172, 0, 1, 75, 127, 0, 1, 42, 73, 0,
81 1, 17, 29, 0, 62, 190, 238, 0, 21, 159, 222, 0, 2, 107, 172, 0, 1, 72, 122, 0,
82 1, 40, 71, 0, 1, 18, 32, 0, 61, 199, 240, 0, 27, 161, 226, 0, 4, 113, 180, 0,
83 1, 76, 129, 0, 1, 46, 80, 0, 1, 23, 41, 0, 7, 27, 153, 0, 5, 30, 95, 0,
84 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 75, 127, 0,
85 57, 75, 124, 0, 27, 67, 108, 0, 10, 54, 86, 0, 1, 33, 52, 0, 1, 12, 18, 0,
86 43, 125, 151, 0, 26, 108, 148, 0, 7, 83, 122, 0, 2, 59, 89, 0, 1, 38, 60, 0,
87 1, 17, 27, 0, 23, 144, 163, 0, 13, 112, 154, 0, 2, 75, 117, 0, 1, 50, 81, 0,
88 1, 31, 51, 0, 1, 14, 23, 0, 18, 162, 185, 0, 6, 123, 171, 0, 1, 78, 125, 0,
89 1, 51, 86, 0, 1, 31, 54, 0, 1, 14, 23, 0, 15, 199, 227, 0, 3, 150, 204, 0,
90 1, 91, 146, 0, 1, 55, 95, 0, 1, 30, 53, 0, 1, 11, 20, 0, 19, 55, 240, 0,
91 19, 59, 196, 0, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
92 41, 166, 207, 0, 104, 153, 199, 0, 31, 123, 181, 0, 14, 101, 152, 0, 5, 72, 106, 0,
93 1, 36, 52, 0, 35, 176, 211, 0, 12, 131, 190, 0, 2, 88, 144, 0, 1, 60, 101, 0,
94 1, 36, 60, 0, 1, 16, 28, 0, 28, 183, 213, 0, 8, 134, 191, 0, 1, 86, 142, 0,
95 1, 56, 96, 0, 1, 30, 53, 0, 1, 12, 20, 0, 20, 190, 215, 0, 4, 135, 192, 0,
96 1, 84, 139, 0, 1, 53, 91, 0, 1, 28, 49, 0, 1, 11, 20, 0, 13, 196, 216, 0,
97 2, 137, 192, 0, 1, 86, 143, 0, 1, 57, 99, 0, 1, 32, 56, 0, 1, 13, 24, 0,
98 211, 29, 217, 0, 96, 47, 156, 0, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 78, 120, 193, 0, 111, 116, 186, 0, 46, 102, 164, 0, 15, 80, 128, 0,
100 2, 49, 76, 0, 1, 18, 28, 0, 71, 161, 203, 0, 42, 132, 192, 0, 10, 98, 150, 0,
101 3, 69, 109, 0, 1, 44, 70, 0, 1, 18, 29, 0, 57, 186, 211, 0, 30, 140, 196, 0,
102 4, 93, 146, 0, 1, 62, 102, 0, 1, 38, 65, 0, 1, 16, 27, 0, 47, 199, 217, 0,
103 14, 145, 196, 0, 1, 88, 142, 0, 1, 57, 98, 0, 1, 36, 62, 0, 1, 15, 26, 0,
104 26, 219, 229, 0, 5, 155, 207, 0, 1, 94, 151, 0, 1, 60, 104, 0, 1, 36, 62, 0,
105 1, 16, 28, 0, 233, 29, 248, 0, 146, 47, 220, 0, 43, 52, 140, 0, 0, 0, 0, 0,
106 0, 0, 0, 0, 0, 0, 0, 0, 100, 163, 232, 0, 179, 161, 222, 0, 63, 142, 204, 0,
107 37, 113, 174, 0, 26, 89, 137, 0, 18, 68, 97, 0, 85, 181, 230, 0, 32, 146, 209, 0,
108 7, 100, 164, 0, 3, 71, 121, 0, 1, 45, 77, 0, 1, 18, 30, 0, 65, 187, 230, 0,
109 20, 148, 207, 0, 2, 97, 159, 0, 1, 68, 116, 0, 1, 40, 70, 0, 1, 14, 29, 0,
110 40, 194, 227, 0, 8, 147, 204, 0, 1, 94, 155, 0, 1, 65, 112, 0, 1, 39, 66, 0,
111 1, 14, 26, 0, 16, 208, 228, 0, 3, 151, 207, 0, 1, 98, 160, 0, 1, 67, 117, 0,
112 1, 41, 74, 0, 1, 17, 31, 0, 17, 38, 140, 0, 7, 34, 80, 0, 1, 17, 29, 0,
113 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 75, 128, 0, 41, 76, 128, 0,
114 26, 66, 116, 0, 12, 52, 94, 0, 2, 32, 55, 0, 1, 10, 16, 0, 50, 127, 154, 0,
115 37, 109, 152, 0, 16, 82, 121, 0, 5, 59, 85, 0, 1, 35, 54, 0, 1, 13, 20, 0,
116 40, 142, 167, 0, 17, 110, 157, 0, 2, 71, 112, 0, 1, 44, 72, 0, 1, 27, 45, 0,
117 1, 11, 17, 0, 30, 175, 188, 0, 9, 124, 169, 0, 1, 74, 116, 0, 1, 48, 78, 0,
118 1, 30, 49, 0, 1, 11, 18, 0, 10, 222, 223, 0, 2, 150, 194, 0, 1, 83, 128, 0,
119 1, 48, 79, 0, 1, 27, 45, 0, 1, 11, 17, 0, 36, 41, 235, 0, 29, 36, 193, 0,
120 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 165, 222, 0,
121 177, 162, 215, 0, 110, 135, 195, 0, 57, 113, 168, 0, 23, 83, 120, 0, 10, 49, 61, 0,
122 85, 190, 223, 0, 36, 139, 200, 0, 5, 90, 146, 0, 1, 60, 103, 0, 1, 38, 65, 0,
123 1, 18, 30, 0, 72, 202, 223, 0, 23, 141, 199, 0, 2, 86, 140, 0, 1, 56, 97, 0,
124 1, 36, 61, 0, 1, 16, 27, 0, 55, 218, 225, 0, 13, 145, 200, 0, 1, 86, 141, 0,
125 1, 57, 99, 0, 1, 35, 61, 0, 1, 13, 22, 0, 15, 235, 212, 0, 1, 132, 184, 0,
126 1, 84, 139, 0, 1, 57, 97, 0, 1, 34, 56, 0, 1, 14, 23, 0, 181, 21, 201, 0,
127 61, 37, 123, 0, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
128 47, 106, 172, 0, 95, 104, 173, 0, 42, 93, 159, 0, 18, 77, 131, 0, 4, 50, 81, 0,
129 1, 17, 23, 0, 62, 147, 199, 0, 44, 130, 189, 0, 28, 102, 154, 0, 18, 75, 115, 0,
130 2, 44, 65, 0, 1, 12, 19, 0, 55, 153, 210, 0, 24, 130, 194, 0, 3, 93, 146, 0,
131 1, 61, 97, 0, 1, 31, 50, 0, 1, 10, 16, 0, 49, 186, 223, 0, 17, 148, 204, 0,
132 1, 96, 142, 0, 1, 53, 83, 0, 1, 26, 44, 0, 1, 11, 17, 0, 13, 217, 212, 0,
133 2, 136, 180, 0, 1, 78, 124, 0, 1, 50, 83, 0, 1, 29, 49, 0, 1, 14, 23, 0,
134 197, 13, 247, 0, 82, 17, 222, 0, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0,
135 0, 0, 0, 0, 126, 186, 247, 0, 234, 191, 243, 0, 176, 177, 234, 0, 104, 158, 220, 0,
136 66, 128, 186, 0, 55, 90, 137, 0, 111, 197, 242, 0, 46, 158, 219, 0, 9, 104, 171, 0,
137 2, 65, 125, 0, 1, 44, 80, 0, 1, 17, 91, 0, 104, 208, 245, 0, 39, 168, 224, 0,
138 3, 109, 162, 0, 1, 79, 124, 0, 1, 50, 102, 0, 1, 43, 102, 0, 84, 220, 246, 0,
139 31, 177, 231, 0, 2, 115, 180, 0, 1, 79, 134, 0, 1, 55, 77, 0, 1, 60, 79, 0,
140 43, 243, 240, 0, 8, 180, 217, 0, 1, 115, 166, 0, 1, 84, 121, 0, 1, 51, 67, 0,
141 1, 16, 6, 0,
142 },
143 .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144},
144 .inter_mode_prob{
145 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94,
146 66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0,
147 },
148 .intra_inter_prob{9, 102, 187, 225},
149 .comp_inter_prob{9, 102, 187, 225, 0},
150 .single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247},
151 .comp_ref_prob{50, 126, 123, 221, 226},
152 .tx_32x32_prob{3, 136, 37, 5, 52, 13},
153 .tx_16x16_prob{20, 152, 15, 101},
154 .tx_8x8_prob{100, 66},
155 .skip_probs{192, 128, 64},
156 .joints{32, 64, 96},
157 .sign{128, 128},
158 .classes{
159 224, 144, 192, 168, 192, 176, 192, 198, 198, 245,
160 216, 128, 176, 160, 176, 176, 192, 198, 198, 208,
161 },
162 .class_0{216, 208},
163 .prob_bits{
164 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
165 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
166 },
167 .class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64},
168 .fr{64, 96, 64, 64, 96, 64},
169 .class_0_hp{160, 160},
170 .high_precision{128, 128},
171};
172
173VP9::VP9(GPU& gpu) : gpu(gpu) {}
174
175VP9::~VP9() = default;
176
177void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
178 const bool update = new_prob != old_prob;
179
180 writer.Write(update, diff_update_probability);
181
182 if (update) {
183 WriteProbabilityDelta(writer, new_prob, old_prob);
184 }
185}
186template <typename T, std::size_t N>
187void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
188 const std::array<T, N>& old_prob) {
189 for (std::size_t offset = 0; offset < new_prob.size(); ++offset) {
190 WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]);
191 }
192}
193
194template <typename T, std::size_t N>
195void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
196 const std::array<T, N>& old_prob) {
197 for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) {
198 WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]);
199 WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]);
200 WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]);
201 }
202}
203
204void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
205 const int delta = RemapProbability(new_prob, old_prob);
206
207 EncodeTermSubExp(writer, delta);
208}
209
210s32 VP9::RemapProbability(s32 new_prob, s32 old_prob) {
211 new_prob--;
212 old_prob--;
213
214 std::size_t index{};
215
216 if (old_prob * 2 <= 0xff) {
217 index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
218 } else {
219 index = static_cast<std::size_t>(
220 std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
221 }
222
223 return map_lut[index];
224}
225
226s32 VP9::RecenterNonNeg(s32 new_prob, s32 old_prob) {
227 if (new_prob > old_prob * 2) {
228 return new_prob;
229 } else if (new_prob >= old_prob) {
230 return (new_prob - old_prob) * 2;
231 } else {
232 return (old_prob - new_prob) * 2 - 1;
233 }
234}
235
236void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) {
237 if (WriteLessThan(writer, value, 16)) {
238 writer.Write(value, 4);
239 } else if (WriteLessThan(writer, value, 32)) {
240 writer.Write(value - 16, 4);
241 } else if (WriteLessThan(writer, value, 64)) {
242 writer.Write(value - 32, 5);
243 } else {
244 value -= 64;
245
246 constexpr s32 size = 8;
247
248 const s32 mask = (1 << size) - 191;
249
250 const s32 delta = value - mask;
251
252 if (delta < 0) {
253 writer.Write(value, size - 1);
254 } else {
255 writer.Write(delta / 2 + mask, size - 1);
256 writer.Write(delta & 1, 1);
257 }
258 }
259}
260
261bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) {
262 const bool is_lt = value < test;
263 writer.Write(!is_lt);
264 return is_lt;
265}
266
267void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
268 const std::array<u8, 2304>& new_prob,
269 const std::array<u8, 2304>& old_prob) {
270 // Note: There's 1 byte added on each packet for alignment,
271 // this byte is ignored when doing updates.
272 constexpr s32 block_bytes = 2 * 2 * 6 * 6 * 4;
273
274 const auto needs_update = [&](s32 base_index) -> bool {
275 s32 index = base_index;
276 for (s32 i = 0; i < 2; i++) {
277 for (s32 j = 0; j < 2; j++) {
278 for (s32 k = 0; k < 6; k++) {
279 for (s32 l = 0; l < 6; l++) {
280 if (new_prob[index + 0] != old_prob[index + 0] ||
281 new_prob[index + 1] != old_prob[index + 1] ||
282 new_prob[index + 2] != old_prob[index + 2]) {
283 return true;
284 }
285
286 index += 4;
287 }
288 }
289 }
290 }
291 return false;
292 };
293
294 for (s32 block_index = 0; block_index < 4; block_index++) {
295 const s32 base_index = block_index * block_bytes;
296 const bool update = needs_update(base_index);
297 writer.Write(update);
298
299 if (update) {
300 s32 index = base_index;
301 for (s32 i = 0; i < 2; i++) {
302 for (s32 j = 0; j < 2; j++) {
303 for (s32 k = 0; k < 6; k++) {
304 for (s32 l = 0; l < 6; l++) {
305 if (k != 0 || l < 3) {
306 WriteProbabilityUpdate(writer, new_prob[index + 0],
307 old_prob[index + 0]);
308 WriteProbabilityUpdate(writer, new_prob[index + 1],
309 old_prob[index + 1]);
310 WriteProbabilityUpdate(writer, new_prob[index + 2],
311 old_prob[index + 2]);
312 }
313 index += 4;
314 }
315 }
316 }
317 }
318 }
319
320 if (block_index == tx_mode) {
321 break;
322 }
323 }
324}
325
326void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
327 const bool update = new_prob != old_prob;
328 writer.Write(update, diff_update_probability);
329
330 if (update) {
331 writer.Write(new_prob >> 1, 7);
332 }
333}
334
335s32 VP9::CalcMinLog2TileCols(s32 frame_width) {
336 const s32 sb64_cols = (frame_width + 63) / 64;
337 s32 min_log2 = 0;
338
339 while ((64 << min_log2) < sb64_cols) {
340 min_log2++;
341 }
342
343 return min_log2;
344}
345
346s32 VP9::CalcMaxLog2TileCols(s32 frameWidth) {
347 const s32 sb64_cols = (frameWidth + 63) / 64;
348 s32 max_log2 = 1;
349
350 while ((sb64_cols >> max_log2) >= 4) {
351 max_log2++;
352 }
353
354 return max_log2 - 1;
355}
356
357Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
358 PictureInfo picture_info{};
359 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
360 Vp9PictureInfo vp9_info = picture_info.Convert();
361
362 InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
363
364 // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
365 // order: last, golden, altref, current. It may be worthwhile to track the updates done here
366 // to avoid buffering frame data needed for reference frame updating in the header composition.
367 std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64));
368
369 return std::move(vp9_info);
370}
371
372void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
373 EntropyProbs entropy{};
374 gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
375 entropy.Convert(dst);
376}
377
378Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) {
379 Vp9FrameContainer frame{};
380 {
381 gpu.SyncGuestHost();
382 frame.info = std::move(GetVp9PictureInfo(state));
383
384 frame.bit_stream.resize(frame.info.bitstream_size);
385 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(),
386 frame.info.bitstream_size);
387 }
388 // Buffer two frames, saving the last show frame info
389 if (next_next_frame.bit_stream.size() != 0) {
390 Vp9FrameContainer temp{
391 .info = frame.info,
392 .bit_stream = frame.bit_stream,
393 };
394 next_next_frame.info.show_frame = frame.info.last_frame_shown;
395 frame.info = next_next_frame.info;
396 frame.bit_stream = next_next_frame.bit_stream;
397 next_next_frame = std::move(temp);
398
399 if (next_frame.bit_stream.size() != 0) {
400 Vp9FrameContainer temp{
401 .info = frame.info,
402 .bit_stream = frame.bit_stream,
403 };
404 next_frame.info.show_frame = frame.info.last_frame_shown;
405 frame.info = next_frame.info;
406 frame.bit_stream = next_frame.bit_stream;
407 next_frame = std::move(temp);
408 } else {
409 next_frame.info = frame.info;
410 next_frame.bit_stream = frame.bit_stream;
411 }
412 } else {
413 next_next_frame.info = frame.info;
414 next_next_frame.bit_stream = frame.bit_stream;
415 }
416 return frame;
417}
418
419std::vector<u8> VP9::ComposeCompressedHeader() {
420 VpxRangeEncoder writer{};
421
422 if (!current_frame_info.lossless) {
423 if (static_cast<u32>(current_frame_info.transform_mode) >= 3) {
424 writer.Write(3, 2);
425 writer.Write(current_frame_info.transform_mode == 4);
426 } else {
427 writer.Write(current_frame_info.transform_mode, 2);
428 }
429 }
430
431 if (current_frame_info.transform_mode == 4) {
432 // tx_mode_probs() in the spec
433 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob,
434 prev_frame_probs.tx_8x8_prob);
435 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob,
436 prev_frame_probs.tx_16x16_prob);
437 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob,
438 prev_frame_probs.tx_32x32_prob);
439 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
440 prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob;
441 prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob;
442 prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob;
443 }
444 }
445 // read_coef_probs() in the spec
446 WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode,
447 current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs);
448 // read_skip_probs() in the spec
449 WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs,
450 prev_frame_probs.skip_probs);
451
452 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
453 prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs;
454 prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs;
455 }
456
457 if (!current_frame_info.intra_only) {
458 // read_inter_probs() in the spec
459 WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob,
460 prev_frame_probs.inter_mode_prob);
461 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
462 prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob;
463 }
464
465 if (current_frame_info.interp_filter == 4) {
466 // read_interp_filter_probs() in the spec
467 WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob,
468 prev_frame_probs.switchable_interp_prob);
469 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
470 prev_frame_probs.switchable_interp_prob =
471 current_frame_info.entropy.switchable_interp_prob;
472 }
473 }
474
475 // read_is_inter_probs() in the spec
476 WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob,
477 prev_frame_probs.intra_inter_prob);
478 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
479 prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob;
480 }
481 // frame_reference_mode() in the spec
482 if ((current_frame_info.ref_frame_sign_bias[1] & 1) !=
483 (current_frame_info.ref_frame_sign_bias[2] & 1) ||
484 (current_frame_info.ref_frame_sign_bias[1] & 1) !=
485 (current_frame_info.ref_frame_sign_bias[3] & 1)) {
486 if (current_frame_info.reference_mode >= 1) {
487 writer.Write(1, 1);
488 writer.Write(current_frame_info.reference_mode == 2);
489 } else {
490 writer.Write(0, 1);
491 }
492 }
493
494 // frame_reference_mode_probs() in the spec
495 if (current_frame_info.reference_mode == 2) {
496 WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob,
497 prev_frame_probs.comp_inter_prob);
498 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
499 prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob;
500 }
501 }
502
503 if (current_frame_info.reference_mode != 1) {
504 WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob,
505 prev_frame_probs.single_ref_prob);
506 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
507 prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob;
508 }
509 }
510
511 if (current_frame_info.reference_mode != 0) {
512 WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob,
513 prev_frame_probs.comp_ref_prob);
514 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
515 prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob;
516 }
517 }
518
519 // read_y_mode_probs
520 for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size();
521 ++index) {
522 WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index],
523 prev_frame_probs.y_mode_prob[index]);
524 }
525 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
526 prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob;
527 }
528 // read_partition_probs
529 WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob,
530 prev_frame_probs.partition_prob);
531 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
532 prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob;
533 }
534
535 // mv_probs
536 for (s32 i = 0; i < 3; i++) {
537 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i],
538 prev_frame_probs.joints[i]);
539 }
540 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
541 prev_frame_probs.joints = current_frame_info.entropy.joints;
542 }
543
544 for (s32 i = 0; i < 2; i++) {
545 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i],
546 prev_frame_probs.sign[i]);
547
548 for (s32 j = 0; j < 10; j++) {
549 const int index = i * 10 + j;
550
551 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index],
552 prev_frame_probs.classes[index]);
553 }
554
555 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i],
556 prev_frame_probs.class_0[i]);
557
558 for (s32 j = 0; j < 10; j++) {
559 const int index = i * 10 + j;
560
561 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index],
562 prev_frame_probs.prob_bits[index]);
563 }
564 }
565
566 for (s32 i = 0; i < 2; i++) {
567 for (s32 j = 0; j < 2; j++) {
568 for (s32 k = 0; k < 3; k++) {
569 const int index = i * 2 * 3 + j * 3 + k;
570
571 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index],
572 prev_frame_probs.class_0_fr[index]);
573 }
574 }
575
576 for (s32 j = 0; j < 3; j++) {
577 const int index = i * 3 + j;
578
579 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index],
580 prev_frame_probs.fr[index]);
581 }
582 }
583
584 if (current_frame_info.allow_high_precision_mv) {
585 for (s32 index = 0; index < 2; index++) {
586 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index],
587 prev_frame_probs.class_0_hp[index]);
588 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index],
589 prev_frame_probs.high_precision[index]);
590 }
591 }
592
593 // save previous probs
594 if (current_frame_info.show_frame && !current_frame_info.is_key_frame) {
595 prev_frame_probs.sign = current_frame_info.entropy.sign;
596 prev_frame_probs.classes = current_frame_info.entropy.classes;
597 prev_frame_probs.class_0 = current_frame_info.entropy.class_0;
598 prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits;
599 prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr;
600 prev_frame_probs.fr = current_frame_info.entropy.fr;
601 prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp;
602 prev_frame_probs.high_precision = current_frame_info.entropy.high_precision;
603 }
604 }
605
606 writer.End();
607 return writer.GetBuffer();
608
609 const auto writer_bytearray = writer.GetBuffer();
610
611 std::vector<u8> compressed_header(writer_bytearray.size());
612 std::memcpy(compressed_header.data(), writer_bytearray.data(), writer_bytearray.size());
613 return compressed_header;
614}
615
616VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
617 VpxBitStreamWriter uncomp_writer{};
618
619 uncomp_writer.WriteU(2, 2); // Frame marker.
620 uncomp_writer.WriteU(0, 2); // Profile.
621 uncomp_writer.WriteBit(false); // Show existing frame.
622 uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame?
623 uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame?
624 uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience
625
626 if (current_frame_info.is_key_frame) {
627 uncomp_writer.WriteU(frame_sync_code, 24);
628 uncomp_writer.WriteU(0, 3); // Color space.
629 uncomp_writer.WriteU(0, 1); // Color range.
630 uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
631 uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
632 uncomp_writer.WriteBit(false); // Render and frame size different.
633
634 // Reset context
635 prev_frame_probs = default_probs;
636 swap_next_golden = false;
637 loop_filter_ref_deltas.fill(0);
638 loop_filter_mode_deltas.fill(0);
639
640 // allow frames offsets to stabilize before checking for golden frames
641 grace_period = 4;
642
643 // On key frames, all frame slots are set to the current frame,
644 // so the value of the selected slot doesn't really matter.
645 frame_ctxs.fill({current_frame_number, false, default_probs});
646
647 // intra only, meaning the frame can be recreated with no other references
648 current_frame_info.intra_only = true;
649
650 } else {
651 std::array<s32, 3> ref_frame_index;
652
653 if (!current_frame_info.show_frame) {
654 uncomp_writer.WriteBit(current_frame_info.intra_only);
655 if (!current_frame_info.last_frame_was_key) {
656 swap_next_golden = !swap_next_golden;
657 }
658 } else {
659 current_frame_info.intra_only = false;
660 }
661 if (!current_frame_info.error_resilient_mode) {
662 uncomp_writer.WriteU(0, 2); // Reset frame context.
663 }
664
665 // Last, Golden, Altref frames
666 ref_frame_index = std::array<s32, 3>{0, 1, 2};
667
668 // set when next frame is hidden
669 // altref and golden references are swapped
670 if (swap_next_golden) {
671 ref_frame_index = std::array<s32, 3>{0, 2, 1};
672 }
673
674 // update Last Frame
675 u64 refresh_frame_flags = 1;
676
677 // golden frame may refresh, determined if the next golden frame offset is changed
678 bool golden_refresh = false;
679 if (grace_period <= 0) {
680 for (s32 index = 1; index < 3; ++index) {
681 if (current_frame_info.frame_offsets[index] !=
682 next_frame.info.frame_offsets[index]) {
683 current_frame_info.refresh_frame[index] = true;
684 golden_refresh = true;
685 grace_period = 3;
686 }
687 }
688 }
689
690 if (current_frame_info.show_frame &&
691 (!next_frame.info.show_frame || next_frame.info.is_key_frame)) {
692 // Update golden frame
693 refresh_frame_flags = swap_next_golden ? 2 : 4;
694 }
695
696 if (!current_frame_info.show_frame) {
697 // Update altref
698 refresh_frame_flags = swap_next_golden ? 2 : 4;
699 } else if (golden_refresh) {
700 refresh_frame_flags = 3;
701 }
702
703 if (current_frame_info.intra_only) {
704 uncomp_writer.WriteU(frame_sync_code, 24);
705 uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
706 uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
707 uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
708 uncomp_writer.WriteBit(false); // Render and frame size different.
709 } else {
710 uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
711
712 for (s32 index = 1; index < 4; index++) {
713 uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
714 uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
715 }
716
717 uncomp_writer.WriteBit(true); // Frame size with refs.
718 uncomp_writer.WriteBit(false); // Render and frame size different.
719 uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
720 uncomp_writer.WriteBit(current_frame_info.interp_filter == 4);
721
722 if (current_frame_info.interp_filter != 4) {
723 uncomp_writer.WriteU(current_frame_info.interp_filter, 2);
724 }
725 }
726 }
727
728 if (!current_frame_info.error_resilient_mode) {
729 uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from?
730 uncomp_writer.WriteBit(true); // Frame parallel decoding mode.
731 }
732
733 int frame_ctx_idx = 0;
734 if (!current_frame_info.show_frame) {
735 frame_ctx_idx = 1;
736 }
737
738 uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
739 prev_frame_probs =
740 frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header
741 frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy};
742
743 uncomp_writer.WriteU(current_frame_info.first_level, 6);
744 uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
745 uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled);
746
747 if (current_frame_info.mode_ref_delta_enabled) {
748 // check if ref deltas are different, update accordingly
749 std::array<bool, 4> update_loop_filter_ref_deltas;
750 std::array<bool, 2> update_loop_filter_mode_deltas;
751
752 bool loop_filter_delta_update = false;
753
754 for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
755 const s8 old_deltas = loop_filter_ref_deltas[index];
756 const s8 new_deltas = current_frame_info.ref_deltas[index];
757
758 loop_filter_delta_update |=
759 (update_loop_filter_ref_deltas[index] = old_deltas != new_deltas);
760 }
761
762 for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
763 const s8 old_deltas = loop_filter_mode_deltas[index];
764 const s8 new_deltas = current_frame_info.mode_deltas[index];
765
766 loop_filter_delta_update |=
767 (update_loop_filter_mode_deltas[index] = old_deltas != new_deltas);
768 }
769
770 uncomp_writer.WriteBit(loop_filter_delta_update);
771
772 if (loop_filter_delta_update) {
773 for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
774 uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]);
775
776 if (update_loop_filter_ref_deltas[index]) {
777 uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6);
778 }
779 }
780
781 for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
782 uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]);
783
784 if (update_loop_filter_mode_deltas[index]) {
785 uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6);
786 }
787 }
788 // save new deltas
789 loop_filter_ref_deltas = current_frame_info.ref_deltas;
790 loop_filter_mode_deltas = current_frame_info.mode_deltas;
791 }
792 }
793
794 uncomp_writer.WriteU(current_frame_info.base_q_index, 8);
795
796 uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q);
797 uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
798 uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
799
800 uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
801
802 const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
803 const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width);
804
805 const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2;
806 const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1;
807
808 // If it's less than the maximum, we need to add an extra 0 on the bitstream
809 // to indicate that it should stop reading.
810 if (current_frame_info.log2_tile_cols < max_tile_cols_log2) {
811 uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1);
812 } else {
813 uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff);
814 }
815
816 const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0;
817
818 uncomp_writer.WriteBit(tile_rows_log2_is_nonzero);
819
820 if (tile_rows_log2_is_nonzero) {
821 uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1);
822 }
823
824 return uncomp_writer;
825}
826
827std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
828 std::vector<u8> bitstream;
829 {
830 Vp9FrameContainer curr_frame = GetCurrentFrame(state);
831 current_frame_info = curr_frame.info;
832 bitstream = curr_frame.bit_stream;
833 }
834
835 // The uncompressed header routine sets PrevProb parameters needed for the compressed header
836 auto uncomp_writer = ComposeUncompressedHeader();
837 std::vector<u8> compressed_header = ComposeCompressedHeader();
838
839 uncomp_writer.WriteU(static_cast<s32>(compressed_header.size()), 16);
840 uncomp_writer.Flush();
841 std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray();
842
843 // Write headers and frame to buffer
844 frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
845 std::memcpy(frame.data(), uncompressed_header.data(), uncompressed_header.size());
846 std::memcpy(frame.data() + uncompressed_header.size(), compressed_header.data(),
847 compressed_header.size());
848 std::memcpy(frame.data() + uncompressed_header.size() + compressed_header.size(),
849 bitstream.data(), bitstream.size());
850
851 // keep track of frame number
852 current_frame_number++;
853 grace_period--;
854
855 // don't display hidden frames
856 hidden = !current_frame_info.show_frame;
857 return frame;
858}
859
860VpxRangeEncoder::VpxRangeEncoder() {
861 Write(false);
862}
863
864VpxRangeEncoder::~VpxRangeEncoder() = default;
865
866void VpxRangeEncoder::Write(s32 value, s32 value_size) {
867 for (s32 bit = value_size - 1; bit >= 0; bit--) {
868 Write(((value >> bit) & 1) != 0);
869 }
870}
871
872void VpxRangeEncoder::Write(bool bit) {
873 Write(bit, half_probability);
874}
875
876void VpxRangeEncoder::Write(bool bit, s32 probability) {
877 u32 local_range = range;
878 const u32 split = 1 + (((local_range - 1) * static_cast<u32>(probability)) >> 8);
879 local_range = split;
880
881 if (bit) {
882 low_value += split;
883 local_range = range - split;
884 }
885
886 s32 shift = norm_lut[local_range];
887 local_range <<= shift;
888 count += shift;
889
890 if (count >= 0) {
891 const s32 offset = shift - count;
892
893 if (((low_value << (offset - 1)) >> 31) != 0) {
894 const s32 current_pos = static_cast<s32>(base_stream.GetPosition());
895 base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
896 while (base_stream.GetPosition() >= 0 && PeekByte() == 0xff) {
897 base_stream.WriteByte(0);
898
899 base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos);
900 }
901 base_stream.WriteByte(static_cast<u8>((PeekByte() + 1)));
902 base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin);
903 }
904 base_stream.WriteByte(static_cast<u8>((low_value >> (24 - offset))));
905
906 low_value <<= offset;
907 shift = count;
908 low_value &= 0xffffff;
909 count -= 8;
910 }
911
912 low_value <<= shift;
913 range = local_range;
914}
915
916void VpxRangeEncoder::End() {
917 for (std::size_t index = 0; index < 32; ++index) {
918 Write(false);
919 }
920}
921
922u8 VpxRangeEncoder::PeekByte() {
923 const u8 value = base_stream.ReadByte();
924 base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
925
926 return value;
927}
928
929VpxBitStreamWriter::VpxBitStreamWriter() = default;
930
931VpxBitStreamWriter::~VpxBitStreamWriter() = default;
932
933void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) {
934 WriteBits(value, value_size);
935}
936
937void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) {
938 const bool sign = value < 0;
939 if (sign) {
940 value = -value;
941 }
942
943 WriteBits(static_cast<u32>(value << 1) | (sign ? 1 : 0), value_size + 1);
944}
945
946void VpxBitStreamWriter::WriteDeltaQ(u32 value) {
947 const bool delta_coded = value != 0;
948 WriteBit(delta_coded);
949
950 if (delta_coded) {
951 WriteBits(value, 4);
952 }
953}
954
955void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) {
956 s32 value_pos = 0;
957 s32 remaining = bit_count;
958
959 while (remaining > 0) {
960 s32 copy_size = remaining;
961
962 const s32 free = GetFreeBufferBits();
963
964 if (copy_size > free) {
965 copy_size = free;
966 }
967
968 const s32 mask = (1 << copy_size) - 1;
969
970 const s32 src_shift = (bit_count - value_pos) - copy_size;
971 const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
972
973 buffer |= ((value >> src_shift) & mask) << dst_shift;
974
975 value_pos += copy_size;
976 buffer_pos += copy_size;
977 remaining -= copy_size;
978 }
979}
980
981void VpxBitStreamWriter::WriteBit(bool state) {
982 WriteBits(state ? 1 : 0, 1);
983}
984
985s32 VpxBitStreamWriter::GetFreeBufferBits() {
986 if (buffer_pos == buffer_size) {
987 Flush();
988 }
989
990 return buffer_size - buffer_pos;
991}
992
993void VpxBitStreamWriter::Flush() {
994 if (buffer_pos == 0) {
995 return;
996 }
997 byte_array.push_back(static_cast<u8>(buffer));
998 buffer = 0;
999 buffer_pos = 0;
1000}
1001
1002std::vector<u8>& VpxBitStreamWriter::GetByteArray() {
1003 return byte_array;
1004}
1005
1006const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const {
1007 return byte_array;
1008}
1009
1010} // namespace Tegra::Decoder
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
new file mode 100644
index 000000000..748e11bae
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -0,0 +1,216 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8#include <vector>
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "common/stream.h"
12#include "video_core/command_classes/codecs/vp9_types.h"
13#include "video_core/command_classes/nvdec_common.h"
14
15namespace Tegra {
16class GPU;
17enum class FrameType { KeyFrame = 0, InterFrame = 1 };
18namespace Decoder {
19
20/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
21/// VP9 header bitstreams.
22
23class VpxRangeEncoder {
24public:
25 VpxRangeEncoder();
26 ~VpxRangeEncoder();
27
28 /// Writes the rightmost value_size bits from value into the stream
29 void Write(s32 value, s32 value_size);
30
31 /// Writes a single bit with half probability
32 void Write(bool bit);
33
34 /// Writes a bit to the base_stream encoded with probability
35 void Write(bool bit, s32 probability);
36
37 /// Signal the end of the bitstream
38 void End();
39
40 std::vector<u8>& GetBuffer() {
41 return base_stream.GetBuffer();
42 }
43
44 const std::vector<u8>& GetBuffer() const {
45 return base_stream.GetBuffer();
46 }
47
48private:
49 u8 PeekByte();
50 Common::Stream base_stream{};
51 u32 low_value{};
52 u32 range{0xff};
53 s32 count{-24};
54 s32 half_probability{128};
55 static constexpr std::array<s32, 256> norm_lut{
56 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
57 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
58 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
65 };
66};
67
68class VpxBitStreamWriter {
69public:
70 VpxBitStreamWriter();
71 ~VpxBitStreamWriter();
72
73 /// Write an unsigned integer value
74 void WriteU(u32 value, u32 value_size);
75
76 /// Write a signed integer value
77 void WriteS(s32 value, u32 value_size);
78
79 /// Based on 6.2.10 of VP9 Spec, writes a delta coded value
80 void WriteDeltaQ(u32 value);
81
82 /// Write a single bit.
83 void WriteBit(bool state);
84
85 /// Pushes current buffer into buffer_array, resets buffer
86 void Flush();
87
88 /// Returns byte_array
89 std::vector<u8>& GetByteArray();
90
91 /// Returns const byte_array
92 const std::vector<u8>& GetByteArray() const;
93
94private:
95 /// Write bit_count bits from value into buffer
96 void WriteBits(u32 value, u32 bit_count);
97
98 /// Gets next available position in buffer, invokes Flush() if buffer is full
99 s32 GetFreeBufferBits();
100
101 s32 buffer_size{8};
102
103 s32 buffer{};
104 s32 buffer_pos{};
105 std::vector<u8> byte_array;
106};
107
108class VP9 {
109public:
110 explicit VP9(GPU& gpu);
111 ~VP9();
112
113 /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
114 /// documentation
115 std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
116
117 /// Returns true if the most recent frame was a hidden frame.
118 bool WasFrameHidden() const {
119 return hidden;
120 }
121
122private:
123 /// Generates compressed header probability updates in the bitstream writer
124 template <typename T, std::size_t N>
125 void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
126 const std::array<T, N>& old_prob);
127
128 /// Generates compressed header probability updates in the bitstream writer
129 /// If probs are not equal, WriteProbabilityDelta is invoked
130 void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
131
132 /// Generates compressed header probability deltas in the bitstream writer
133 void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
134
135 /// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
136 s32 RemapProbability(s32 new_prob, s32 old_prob);
137
138 /// Recenters probability. Based on section 6.3.6 of VP9 Specification
139 s32 RecenterNonNeg(s32 new_prob, s32 old_prob);
140
141 /// Inverse of 6.3.4 Decode term subexp
142 void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
143
144 /// Writes if the value is less than the test value
145 bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test);
146
147 /// Writes probability updates for the Coef probabilities
148 void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
149 const std::array<u8, 2304>& new_prob,
150 const std::array<u8, 2304>& old_prob);
151
152 /// Write probabilities for 4-byte aligned structures
153 template <typename T, std::size_t N>
154 void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
155 const std::array<T, N>& old_prob);
156
157 /// Write motion vector probability updates. 6.3.17 in the spec
158 void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
159
160 /// 6.2.14 Tile size calculation
161 s32 CalcMinLog2TileCols(s32 frame_width);
162 s32 CalcMaxLog2TileCols(s32 frame_width);
163
164 /// Returns VP9 information from NVDEC provided offset and size
165 Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
166
167 /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
168 void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
169
170 /// Returns frame to be decoded after buffering
171 Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
172
173 /// Use NVDEC providied information to compose the headers for the current frame
174 std::vector<u8> ComposeCompressedHeader();
175 VpxBitStreamWriter ComposeUncompressedHeader();
176
177 GPU& gpu;
178 std::vector<u8> frame;
179
180 std::array<s8, 4> loop_filter_ref_deltas{};
181 std::array<s8, 2> loop_filter_mode_deltas{};
182
183 bool hidden;
184 s64 current_frame_number = -2; // since we buffer 2 frames
185 s32 grace_period = 6; // frame offsets need to stabilize
186 std::array<FrameContexts, 4> frame_ctxs{};
187 Vp9FrameContainer next_frame{};
188 Vp9FrameContainer next_next_frame{};
189 bool swap_next_golden{};
190
191 Vp9PictureInfo current_frame_info{};
192 Vp9EntropyProbs prev_frame_probs{};
193
194 s32 diff_update_probability = 252;
195 s32 frame_sync_code = 0x498342;
196 static constexpr std::array<s32, 254> map_lut = {
197 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
198 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50,
199 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66,
200 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82,
201 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 6,
202 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113,
203 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129,
204 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
205 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, 160,
206 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176,
207 177, 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,
208 193, 14, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207,
209 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223,
210 224, 225, 226, 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
211 240, 241, 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
212 };
213};
214
215} // namespace Decoder
216} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
new file mode 100644
index 000000000..8688fdac0
--- /dev/null
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -0,0 +1,369 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <list>
9#include <vector>
10#include "common/cityhash.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "video_core/command_classes/nvdec_common.h"
14
15namespace Tegra {
16class GPU;
17
18namespace Decoder {
19struct Vp9FrameDimensions {
20 s16 width{};
21 s16 height{};
22 s16 luma_pitch{};
23 s16 chroma_pitch{};
24};
25static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
26
27enum FrameFlags : u32 {
28 IsKeyFrame = 1 << 0,
29 LastFrameIsKeyFrame = 1 << 1,
30 FrameSizeChanged = 1 << 2,
31 ErrorResilientMode = 1 << 3,
32 LastShowFrame = 1 << 4,
33 IntraOnly = 1 << 5,
34};
35
36enum class MvJointType {
37 MvJointZero = 0, /* Zero vector */
38 MvJointHnzvz = 1, /* Vert zero, hor nonzero */
39 MvJointHzvnz = 2, /* Hor zero, vert nonzero */
40 MvJointHnzvnz = 3, /* Both components nonzero */
41};
42enum class MvClassType {
43 MvClass0 = 0, /* (0, 2] integer pel */
44 MvClass1 = 1, /* (2, 4] integer pel */
45 MvClass2 = 2, /* (4, 8] integer pel */
46 MvClass3 = 3, /* (8, 16] integer pel */
47 MvClass4 = 4, /* (16, 32] integer pel */
48 MvClass5 = 5, /* (32, 64] integer pel */
49 MvClass6 = 6, /* (64, 128] integer pel */
50 MvClass7 = 7, /* (128, 256] integer pel */
51 MvClass8 = 8, /* (256, 512] integer pel */
52 MvClass9 = 9, /* (512, 1024] integer pel */
53 MvClass10 = 10, /* (1024,2048] integer pel */
54};
55
56enum class BlockSize {
57 Block4x4 = 0,
58 Block4x8 = 1,
59 Block8x4 = 2,
60 Block8x8 = 3,
61 Block8x16 = 4,
62 Block16x8 = 5,
63 Block16x16 = 6,
64 Block16x32 = 7,
65 Block32x16 = 8,
66 Block32x32 = 9,
67 Block32x64 = 10,
68 Block64x32 = 11,
69 Block64x64 = 12,
70 BlockSizes = 13,
71 BlockInvalid = BlockSizes
72};
73
74enum class PredictionMode {
75 DcPred = 0, // Average of above and left pixels
76 VPred = 1, // Vertical
77 HPred = 2, // Horizontal
78 D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi)
79 D135Pred = 4, // Directional 135 deg = 180 - 45
80 D117Pred = 5, // Directional 117 deg = 180 - 63
81 D153Pred = 6, // Directional 153 deg = 180 - 27
82 D207Pred = 7, // Directional 207 deg = 180 + 27
83 D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi)
84 TmPred = 9, // True-motion
85 NearestMv = 10,
86 NearMv = 11,
87 ZeroMv = 12,
88 NewMv = 13,
89 MbModeCount = 14
90};
91
92enum class TxSize {
93 Tx4x4 = 0, // 4x4 transform
94 Tx8x8 = 1, // 8x8 transform
95 Tx16x16 = 2, // 16x16 transform
96 Tx32x32 = 3, // 32x32 transform
97 TxSizes = 4
98};
99
100enum class TxMode {
101 Only4X4 = 0, // Only 4x4 transform used
102 Allow8X8 = 1, // Allow block transform size up to 8x8
103 Allow16X16 = 2, // Allow block transform size up to 16x16
104 Allow32X32 = 3, // Allow block transform size up to 32x32
105 TxModeSelect = 4, // Transform specified for each block
106 TxModes = 5
107};
108
109enum class reference_mode {
110 SingleReference = 0,
111 CompoundReference = 1,
112 ReferenceModeSelect = 2,
113 ReferenceModes = 3
114};
115
116struct Segmentation {
117 u8 enabled{};
118 u8 update_map{};
119 u8 temporal_update{};
120 u8 abs_delta{};
121 std::array<u32, 8> feature_mask{};
122 std::array<std::array<s16, 4>, 8> feature_data{};
123};
124static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
125
126struct LoopFilter {
127 u8 mode_ref_delta_enabled{};
128 std::array<s8, 4> ref_deltas{};
129 std::array<s8, 2> mode_deltas{};
130};
131static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
132
133struct Vp9EntropyProbs {
134 std::array<u8, 36> y_mode_prob{};
135 std::array<u8, 64> partition_prob{};
136 std::array<u8, 2304> coef_probs{};
137 std::array<u8, 8> switchable_interp_prob{};
138 std::array<u8, 28> inter_mode_prob{};
139 std::array<u8, 4> intra_inter_prob{};
140 std::array<u8, 5> comp_inter_prob{};
141 std::array<u8, 10> single_ref_prob{};
142 std::array<u8, 5> comp_ref_prob{};
143 std::array<u8, 6> tx_32x32_prob{};
144 std::array<u8, 4> tx_16x16_prob{};
145 std::array<u8, 2> tx_8x8_prob{};
146 std::array<u8, 3> skip_probs{};
147 std::array<u8, 3> joints{};
148 std::array<u8, 2> sign{};
149 std::array<u8, 20> classes{};
150 std::array<u8, 2> class_0{};
151 std::array<u8, 20> prob_bits{};
152 std::array<u8, 12> class_0_fr{};
153 std::array<u8, 6> fr{};
154 std::array<u8, 2> class_0_hp{};
155 std::array<u8, 2> high_precision{};
156};
157static_assert(sizeof(Vp9EntropyProbs) == 0x9F4, "Vp9EntropyProbs is an invalid size");
158
159struct Vp9PictureInfo {
160 bool is_key_frame{};
161 bool intra_only{};
162 bool last_frame_was_key{};
163 bool frame_size_changed{};
164 bool error_resilient_mode{};
165 bool last_frame_shown{};
166 bool show_frame{};
167 std::array<s8, 4> ref_frame_sign_bias{};
168 s32 base_q_index{};
169 s32 y_dc_delta_q{};
170 s32 uv_dc_delta_q{};
171 s32 uv_ac_delta_q{};
172 bool lossless{};
173 s32 transform_mode{};
174 bool allow_high_precision_mv{};
175 s32 interp_filter{};
176 s32 reference_mode{};
177 s8 comp_fixed_ref{};
178 std::array<s8, 2> comp_var_ref{};
179 s32 log2_tile_cols{};
180 s32 log2_tile_rows{};
181 bool segment_enabled{};
182 bool segment_map_update{};
183 bool segment_map_temporal_update{};
184 s32 segment_abs_delta{};
185 std::array<u32, 8> segment_feature_enable{};
186 std::array<std::array<s16, 4>, 8> segment_feature_data{};
187 bool mode_ref_delta_enabled{};
188 bool use_prev_in_find_mv_refs{};
189 std::array<s8, 4> ref_deltas{};
190 std::array<s8, 2> mode_deltas{};
191 Vp9EntropyProbs entropy{};
192 Vp9FrameDimensions frame_size{};
193 u8 first_level{};
194 u8 sharpness_level{};
195 u32 bitstream_size{};
196 std::array<u64, 4> frame_offsets{};
197 std::array<bool, 4> refresh_frame{};
198};
199
200struct Vp9FrameContainer {
201 Vp9PictureInfo info{};
202 std::vector<u8> bit_stream;
203};
204
205struct PictureInfo {
206 INSERT_PADDING_WORDS(12);
207 u32 bitstream_size{};
208 INSERT_PADDING_WORDS(5);
209 Vp9FrameDimensions last_frame_size{};
210 Vp9FrameDimensions golden_frame_size{};
211 Vp9FrameDimensions alt_frame_size{};
212 Vp9FrameDimensions current_frame_size{};
213 u32 vp9_flags{};
214 std::array<s8, 4> ref_frame_sign_bias{};
215 u8 first_level{};
216 u8 sharpness_level{};
217 u8 base_q_index{};
218 u8 y_dc_delta_q{};
219 u8 uv_ac_delta_q{};
220 u8 uv_dc_delta_q{};
221 u8 lossless{};
222 u8 tx_mode{};
223 u8 allow_high_precision_mv{};
224 u8 interp_filter{};
225 u8 reference_mode{};
226 s8 comp_fixed_ref{};
227 std::array<s8, 2> comp_var_ref{};
228 u8 log2_tile_cols{};
229 u8 log2_tile_rows{};
230 Segmentation segmentation{};
231 LoopFilter loop_filter{};
232 INSERT_PADDING_BYTES(5);
233 u32 surface_params{};
234 INSERT_PADDING_WORDS(3);
235
236 Vp9PictureInfo Convert() const {
237
238 return Vp9PictureInfo{
239 .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
240 .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
241 .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
242 .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
243 .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
244 .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
245 .ref_frame_sign_bias = ref_frame_sign_bias,
246 .base_q_index = base_q_index,
247 .y_dc_delta_q = y_dc_delta_q,
248 .uv_dc_delta_q = uv_dc_delta_q,
249 .uv_ac_delta_q = uv_ac_delta_q,
250 .lossless = lossless != 0,
251 .transform_mode = tx_mode,
252 .allow_high_precision_mv = allow_high_precision_mv != 0,
253 .interp_filter = interp_filter,
254 .reference_mode = reference_mode,
255 .comp_fixed_ref = comp_fixed_ref,
256 .comp_var_ref = comp_var_ref,
257 .log2_tile_cols = log2_tile_cols,
258 .log2_tile_rows = log2_tile_rows,
259 .segment_enabled = segmentation.enabled != 0,
260 .segment_map_update = segmentation.update_map != 0,
261 .segment_map_temporal_update = segmentation.temporal_update != 0,
262 .segment_abs_delta = segmentation.abs_delta,
263 .segment_feature_enable = segmentation.feature_mask,
264 .segment_feature_data = segmentation.feature_data,
265 .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
266 .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
267 !(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
268 !(vp9_flags == (FrameFlags::IntraOnly)) &&
269 (vp9_flags == (FrameFlags::LastShowFrame)) &&
270 !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
271 .ref_deltas = loop_filter.ref_deltas,
272 .mode_deltas = loop_filter.mode_deltas,
273 .frame_size = current_frame_size,
274 .first_level = first_level,
275 .sharpness_level = sharpness_level,
276 .bitstream_size = bitstream_size,
277 };
278 }
279};
280static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
281
282struct EntropyProbs {
283 INSERT_PADDING_BYTES(1024);
284 std::array<std::array<u8, 4>, 7> inter_mode_prob{};
285 std::array<u8, 4> intra_inter_prob{};
286 INSERT_PADDING_BYTES(80);
287 std::array<std::array<u8, 1>, 2> tx_8x8_prob{};
288 std::array<std::array<u8, 2>, 2> tx_16x16_prob{};
289 std::array<std::array<u8, 3>, 2> tx_32x32_prob{};
290 std::array<u8, 4> y_mode_prob_e8{};
291 std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
292 INSERT_PADDING_BYTES(64);
293 std::array<std::array<u8, 4>, 16> partition_prob{};
294 INSERT_PADDING_BYTES(10);
295 std::array<std::array<u8, 2>, 4> switchable_interp_prob{};
296 std::array<u8, 5> comp_inter_prob{};
297 std::array<u8, 4> skip_probs{};
298 std::array<u8, 3> joints{};
299 std::array<u8, 2> sign{};
300 std::array<std::array<u8, 1>, 2> class_0{};
301 std::array<std::array<u8, 3>, 2> fr{};
302 std::array<u8, 2> class_0_hp{};
303 std::array<u8, 2> high_precision{};
304 std::array<std::array<u8, 10>, 2> classes{};
305 std::array<std::array<std::array<u8, 3>, 2>, 2> class_0_fr{};
306 std::array<std::array<u8, 10>, 2> pred_bits{};
307 std::array<std::array<u8, 2>, 5> single_ref_prob{};
308 std::array<u8, 5> comp_ref_prob{};
309 INSERT_PADDING_BYTES(17);
310 std::array<std::array<std::array<std::array<std::array<std::array<u8, 4>, 6>, 6>, 2>, 2>, 4>
311 coef_probs{};
312
313 void Convert(Vp9EntropyProbs& fc) {
314 std::memcpy(fc.inter_mode_prob.data(), inter_mode_prob.data(), fc.inter_mode_prob.size());
315
316 std::memcpy(fc.intra_inter_prob.data(), intra_inter_prob.data(),
317 fc.intra_inter_prob.size());
318
319 std::memcpy(fc.tx_8x8_prob.data(), tx_8x8_prob.data(), fc.tx_8x8_prob.size());
320 std::memcpy(fc.tx_16x16_prob.data(), tx_16x16_prob.data(), fc.tx_16x16_prob.size());
321 std::memcpy(fc.tx_32x32_prob.data(), tx_32x32_prob.data(), fc.tx_32x32_prob.size());
322
323 for (s32 i = 0; i < 4; i++) {
324 for (s32 j = 0; j < 9; j++) {
325 fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i];
326 }
327 }
328
329 std::memcpy(fc.partition_prob.data(), partition_prob.data(), fc.partition_prob.size());
330
331 std::memcpy(fc.switchable_interp_prob.data(), switchable_interp_prob.data(),
332 fc.switchable_interp_prob.size());
333 std::memcpy(fc.comp_inter_prob.data(), comp_inter_prob.data(), fc.comp_inter_prob.size());
334 std::memcpy(fc.skip_probs.data(), skip_probs.data(), fc.skip_probs.size());
335
336 std::memcpy(fc.joints.data(), joints.data(), fc.joints.size());
337
338 std::memcpy(fc.sign.data(), sign.data(), fc.sign.size());
339 std::memcpy(fc.class_0.data(), class_0.data(), fc.class_0.size());
340 std::memcpy(fc.fr.data(), fr.data(), fc.fr.size());
341 std::memcpy(fc.class_0_hp.data(), class_0_hp.data(), fc.class_0_hp.size());
342 std::memcpy(fc.high_precision.data(), high_precision.data(), fc.high_precision.size());
343 std::memcpy(fc.classes.data(), classes.data(), fc.classes.size());
344 std::memcpy(fc.class_0_fr.data(), class_0_fr.data(), fc.class_0_fr.size());
345 std::memcpy(fc.prob_bits.data(), pred_bits.data(), fc.prob_bits.size());
346 std::memcpy(fc.single_ref_prob.data(), single_ref_prob.data(), fc.single_ref_prob.size());
347 std::memcpy(fc.comp_ref_prob.data(), comp_ref_prob.data(), fc.comp_ref_prob.size());
348
349 std::memcpy(fc.coef_probs.data(), coef_probs.data(), fc.coef_probs.size());
350 }
351};
352static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size");
353
354enum class Ref { Last, Golden, AltRef };
355
356struct RefPoolElement {
357 s64 frame{};
358 Ref ref{};
359 bool refresh{};
360};
361
362struct FrameContexts {
363 s64 from{};
364 bool adapted{};
365 Vp9EntropyProbs probs{};
366};
367
368}; // namespace Decoder
369}; // namespace Tegra
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
new file mode 100644
index 000000000..a5234ee47
--- /dev/null
+++ b/src/video_core/command_classes/host1x.cpp
@@ -0,0 +1,39 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/command_classes/host1x.h"
7#include "video_core/gpu.h"
8
9Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
10
11Tegra::Host1x::~Host1x() = default;
12
13void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) {
14 u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u32);
15 std::memcpy(state_offset, &arguments, sizeof(u32));
16}
17
18void Tegra::Host1x::ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments) {
19 StateWrite(static_cast<u32>(method), arguments[0]);
20 switch (method) {
21 case Method::WaitSyncpt:
22 Execute(arguments[0]);
23 break;
24 case Method::LoadSyncptPayload32:
25 syncpoint_value = arguments[0];
26 break;
27 case Method::WaitSyncpt32:
28 Execute(arguments[0]);
29 break;
30 default:
31 UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
32 break;
33 }
34}
35
36void Tegra::Host1x::Execute(u32 data) {
37 // This method waits on a valid syncpoint.
38 // TODO: Implement when proper Async is in place
39}
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h
new file mode 100644
index 000000000..501a5ed2e
--- /dev/null
+++ b/src/video_core/command_classes/host1x.h
@@ -0,0 +1,78 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10
11namespace Tegra {
12class GPU;
13class Nvdec;
14
15class Host1x {
16public:
17 struct Host1xClassRegisters {
18 u32 incr_syncpt{};
19 u32 incr_syncpt_ctrl{};
20 u32 incr_syncpt_error{};
21 INSERT_PADDING_WORDS(5);
22 u32 wait_syncpt{};
23 u32 wait_syncpt_base{};
24 u32 wait_syncpt_incr{};
25 u32 load_syncpt_base{};
26 u32 incr_syncpt_base{};
27 u32 clear{};
28 u32 wait{};
29 u32 wait_with_interrupt{};
30 u32 delay_use{};
31 u32 tick_count_high{};
32 u32 tick_count_low{};
33 u32 tick_ctrl{};
34 INSERT_PADDING_WORDS(23);
35 u32 ind_ctrl{};
36 u32 ind_off2{};
37 u32 ind_off{};
38 std::array<u32, 31> ind_data{};
39 INSERT_PADDING_WORDS(1);
40 u32 load_syncpoint_payload32{};
41 u32 stall_ctrl{};
42 u32 wait_syncpt32{};
43 u32 wait_syncpt_base32{};
44 u32 load_syncpt_base32{};
45 u32 incr_syncpt_base32{};
46 u32 stall_count_high{};
47 u32 stall_count_low{};
48 u32 xref_ctrl{};
49 u32 channel_xref_high{};
50 u32 channel_xref_low{};
51 };
52 static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size");
53
54 enum class Method : u32 {
55 WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4,
56 LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4,
57 WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4,
58 };
59
60 explicit Host1x(GPU& gpu);
61 ~Host1x();
62
63 /// Writes the method into the state, Invoke Execute() if encountered
64 void ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments);
65
66private:
67 /// For Host1x, execute is waiting on a syncpoint previously written into the state
68 void Execute(u32 data);
69
70 /// Write argument into the provided offset
71 void StateWrite(u32 offset, u32 arguments);
72
73 u32 syncpoint_value{};
74 Host1xClassRegisters state{};
75 GPU& gpu;
76};
77
78} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
new file mode 100644
index 000000000..ede9466eb
--- /dev/null
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -0,0 +1,56 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <bitset>
6#include "common/assert.h"
7#include "common/bit_util.h"
8#include "core/memory.h"
9#include "video_core/command_classes/nvdec.h"
10#include "video_core/gpu.h"
11#include "video_core/memory_manager.h"
12
13namespace Tegra {
14
15Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
16
17Nvdec::~Nvdec() = default;
18
19void Nvdec::ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments) {
20 if (method == Method::SetVideoCodec) {
21 codec->StateWrite(static_cast<u32>(method), arguments[0]);
22 } else {
23 codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8);
24 }
25
26 switch (method) {
27 case Method::SetVideoCodec:
28 codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0]));
29 break;
30 case Method::Execute:
31 Execute();
32 break;
33 }
34}
35
36AVFrame* Nvdec::GetFrame() {
37 return codec->GetCurrentFrame();
38}
39
40const AVFrame* Nvdec::GetFrame() const {
41 return codec->GetCurrentFrame();
42}
43
44void Nvdec::Execute() {
45 switch (codec->GetCurrentCodec()) {
46 case NvdecCommon::VideoCodec::H264:
47 case NvdecCommon::VideoCodec::Vp9:
48 codec->Decode();
49 break;
50 default:
51 UNIMPLEMENTED_MSG("Unknown codec {}", static_cast<u32>(codec->GetCurrentCodec()));
52 break;
53 }
54}
55
56} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
new file mode 100644
index 000000000..c1a9d843e
--- /dev/null
+++ b/src/video_core/command_classes/nvdec.h
@@ -0,0 +1,39 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10#include "video_core/command_classes/codecs/codec.h"
11
12namespace Tegra {
13class GPU;
14
15class Nvdec {
16public:
17 enum class Method : u32 {
18 SetVideoCodec = 0x80,
19 Execute = 0xc0,
20 };
21
22 explicit Nvdec(GPU& gpu);
23 ~Nvdec();
24
25 /// Writes the method into the state, Invoke Execute() if encountered
26 void ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments);
27
28 /// Return most recently decoded frame
29 AVFrame* GetFrame();
30 const AVFrame* GetFrame() const;
31
32private:
33 /// Invoke codec to decode a frame
34 void Execute();
35
36 GPU& gpu;
37 std::unique_ptr<Tegra::Codec> codec;
38};
39} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h
new file mode 100644
index 000000000..01b5e086d
--- /dev/null
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -0,0 +1,48 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "common/common_types.h"
9
10namespace Tegra::NvdecCommon {
11
12struct NvdecRegisters {
13 INSERT_PADDING_WORDS(256);
14 u64 set_codec_id{};
15 INSERT_PADDING_WORDS(254);
16 u64 set_platform_id{};
17 u64 picture_info_offset{};
18 u64 frame_bitstream_offset{};
19 u64 frame_number{};
20 u64 h264_slice_data_offsets{};
21 u64 h264_mv_dump_offset{};
22 INSERT_PADDING_WORDS(6);
23 u64 frame_stats_offset{};
24 u64 h264_last_surface_luma_offset{};
25 u64 h264_last_surface_chroma_offset{};
26 std::array<u64, 17> surface_luma_offset{};
27 std::array<u64, 17> surface_chroma_offset{};
28 INSERT_PADDING_WORDS(132);
29 u64 vp9_entropy_probs_offset{};
30 u64 vp9_backward_updates_offset{};
31 u64 vp9_last_frame_segmap_offset{};
32 u64 vp9_curr_frame_segmap_offset{};
33 INSERT_PADDING_WORDS(2);
34 u64 vp9_last_frame_mvs_offset{};
35 u64 vp9_curr_frame_mvs_offset{};
36 INSERT_PADDING_WORDS(2);
37};
38static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
39
40enum class VideoCodec : u32 {
41 None = 0x0,
42 H264 = 0x3,
43 Vp8 = 0x5,
44 H265 = 0x7,
45 Vp9 = 0x9,
46};
47
48} // namespace Tegra::NvdecCommon
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp
new file mode 100644
index 000000000..a0ab44855
--- /dev/null
+++ b/src/video_core/command_classes/sync_manager.cpp
@@ -0,0 +1,60 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#include <algorithm>
22#include "sync_manager.h"
23#include "video_core/gpu.h"
24
25namespace Tegra {
26SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
27SyncptIncrManager::~SyncptIncrManager() = default;
28
29void SyncptIncrManager::Increment(u32 id) {
30 increments.push_back(SyncptIncr{0, id, true});
31 IncrementAllDone();
32}
33
34u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
35 const u32 handle = current_id++;
36 increments.push_back(SyncptIncr{handle, class_id, id});
37 return handle;
38}
39
40void SyncptIncrManager::SignalDone(u32 handle) {
41 auto done_incr = std::find_if(increments.begin(), increments.end(),
42 [handle](SyncptIncr incr) { return incr.id == handle; });
43 if (done_incr != increments.end()) {
44 const SyncptIncr incr = *done_incr;
45 *done_incr = SyncptIncr{incr.id, incr.class_id, incr.syncpt_id, true};
46 }
47 IncrementAllDone();
48}
49
50void SyncptIncrManager::IncrementAllDone() {
51 std::size_t done_count = 0;
52 for (; done_count < increments.size(); ++done_count) {
53 if (!increments[done_count].complete) {
54 break;
55 }
56 gpu.IncrementSyncPoint(increments[done_count].syncpt_id);
57 }
58 increments.erase(increments.begin(), increments.begin() + done_count);
59}
60} // namespace Tegra
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h
new file mode 100644
index 000000000..353b67573
--- /dev/null
+++ b/src/video_core/command_classes/sync_manager.h
@@ -0,0 +1,64 @@
1// MIT License
2//
3// Copyright (c) Ryujinx Team and Contributors
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6// associated documentation files (the "Software"), to deal in the Software without restriction,
7// including without limitation the rights to use, copy, modify, merge, publish, distribute,
8// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or
12// substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19//
20
21#pragma once
22
23#include <mutex>
24#include <vector>
25#include "common/common_types.h"
26
27namespace Tegra {
28class GPU;
29struct SyncptIncr {
30 u32 id;
31 u32 class_id;
32 u32 syncpt_id;
33 bool complete;
34
35 SyncptIncr(u32 id, u32 syncpt_id_, u32 class_id_, bool done = false)
36 : id(id), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
37};
38
39class SyncptIncrManager {
40public:
41 explicit SyncptIncrManager(GPU& gpu);
42 ~SyncptIncrManager();
43
44 /// Add syncpoint id and increment all
45 void Increment(u32 id);
46
47 /// Returns a handle to increment later
48 u32 IncrementWhenDone(u32 class_id, u32 id);
49
50 /// IncrememntAllDone, including handle
51 void SignalDone(u32 handle);
52
53 /// Increment all sequential pending increments that are already done.
54 void IncrementAllDone();
55
56private:
57 std::vector<SyncptIncr> increments;
58 std::mutex increment_lock;
59 u32 current_id{};
60
61 GPU& gpu;
62};
63
64} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
new file mode 100644
index 000000000..66e15a1a8
--- /dev/null
+++ b/src/video_core/command_classes/vic.cpp
@@ -0,0 +1,180 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include "common/assert.h"
7#include "video_core/command_classes/nvdec.h"
8#include "video_core/command_classes/vic.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/gpu.h"
11#include "video_core/memory_manager.h"
12#include "video_core/texture_cache/surface_params.h"
13
14extern "C" {
15#include <libswscale/swscale.h>
16}
17
18namespace Tegra {
19
20Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
21 : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {}
22Vic::~Vic() = default;
23
24void Vic::VicStateWrite(u32 offset, u32 arguments) {
25 u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32);
26 std::memcpy(state_offset, &arguments, sizeof(u32));
27}
28
29void Vic::ProcessMethod(Vic::Method method, const std::vector<u32>& arguments) {
30 LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method));
31 VicStateWrite(static_cast<u32>(method), arguments[0]);
32 const u64 arg = static_cast<u64>(arguments[0]) << 8;
33 switch (method) {
34 case Method::Execute:
35 Execute();
36 break;
37 case Method::SetConfigStructOffset:
38 config_struct_address = arg;
39 break;
40 case Method::SetOutputSurfaceLumaOffset:
41 output_surface_luma_address = arg;
42 break;
43 case Method::SetOutputSurfaceChromaUOffset:
44 output_surface_chroma_u_address = arg;
45 break;
46 case Method::SetOutputSurfaceChromaVOffset:
47 output_surface_chroma_v_address = arg;
48 break;
49 default:
50 break;
51 }
52}
53
54void Vic::Execute() {
55 if (output_surface_luma_address == 0) {
56 LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}",
57 vic_state.output_surface.luma_offset);
58 return;
59 }
60 const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
61 const VideoPixelFormat pixel_format =
62 static_cast<VideoPixelFormat>(config.pixel_format.Value());
63 switch (pixel_format) {
64 case VideoPixelFormat::BGRA8:
65 case VideoPixelFormat::RGBA8: {
66 LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
67 const auto* frame = nvdec_processor->GetFrame();
68
69 if (!frame || frame->width == 0 || frame->height == 0) {
70 return;
71 }
72 if (scaler_ctx == nullptr || frame->width != scaler_width ||
73 frame->height != scaler_height) {
74 const AVPixelFormat target_format =
75 (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA;
76
77 sws_freeContext(scaler_ctx);
78 scaler_ctx = nullptr;
79
80 // FFmpeg returns all frames in YUV420, convert it into expected format
81 scaler_ctx =
82 sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width,
83 frame->height, target_format, 0, nullptr, nullptr, nullptr);
84
85 scaler_width = frame->width;
86 scaler_height = frame->height;
87 }
88 // Get Converted frame
89 const std::size_t linear_size = frame->width * frame->height * 4;
90
91 using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
92 AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free};
93
94 const int converted_stride{frame->width * 4};
95 u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
96
97 sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height,
98 &converted_frame_buf_addr, &converted_stride);
99
100 const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
101 if (blk_kind != 0) {
102 // swizzle pitch linear to block linear
103 const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
104 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
105 block_height, 0);
106 std::vector<u8> swizzled_data(size);
107 Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4,
108 swizzled_data.data(), converted_frame_buffer.get(),
109 false, block_height, 0, 1);
110
111 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
112 gpu.Maxwell3D().OnMemoryWrite();
113 } else {
114 // send pitch linear frame
115 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
116 linear_size);
117 gpu.Maxwell3D().OnMemoryWrite();
118 }
119 break;
120 }
121 case VideoPixelFormat::Yuv420: {
122 LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
123
124 const auto* frame = nvdec_processor->GetFrame();
125
126 if (!frame || frame->width == 0 || frame->height == 0) {
127 return;
128 }
129
130 const std::size_t surface_width = config.surface_width_minus1 + 1;
131 const std::size_t surface_height = config.surface_height_minus1 + 1;
132 const std::size_t half_width = surface_width / 2;
133 const std::size_t half_height = config.surface_height_minus1 / 2;
134 const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
135
136 const auto* luma_ptr = frame->data[0];
137 const auto* chroma_b_ptr = frame->data[1];
138 const auto* chroma_r_ptr = frame->data[2];
139 const auto stride = frame->linesize[0];
140 const auto half_stride = frame->linesize[1];
141
142 std::vector<u8> luma_buffer(aligned_width * surface_height);
143 std::vector<u8> chroma_buffer(aligned_width * half_height);
144
145 // Populate luma buffer
146 for (std::size_t y = 0; y < surface_height - 1; ++y) {
147 std::size_t src = y * stride;
148 std::size_t dst = y * aligned_width;
149
150 std::size_t size = surface_width;
151
152 for (std::size_t offset = 0; offset < size; ++offset) {
153 luma_buffer[dst + offset] = luma_ptr[src + offset];
154 }
155 }
156 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
157 luma_buffer.size());
158
159 // Populate chroma buffer from both channels with interleaving.
160 for (std::size_t y = 0; y < half_height; ++y) {
161 std::size_t src = y * half_stride;
162 std::size_t dst = y * aligned_width;
163
164 for (std::size_t x = 0; x < half_width; ++x) {
165 chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x];
166 chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x];
167 }
168 }
169 gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
170 chroma_buffer.size());
171 gpu.Maxwell3D().OnMemoryWrite();
172 break;
173 }
174 default:
175 UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value());
176 break;
177 }
178}
179
180} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
new file mode 100644
index 000000000..dd0a2aed8
--- /dev/null
+++ b/src/video_core/command_classes/vic.h
@@ -0,0 +1,110 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9#include "common/bit_field.h"
10#include "common/common_types.h"
11
12struct SwsContext;
13
14namespace Tegra {
15class GPU;
16class Nvdec;
17
18struct PlaneOffsets {
19 u32 luma_offset{};
20 u32 chroma_u_offset{};
21 u32 chroma_v_offset{};
22};
23
24struct VicRegisters {
25 INSERT_PADDING_WORDS(64);
26 u32 nop{};
27 INSERT_PADDING_WORDS(15);
28 u32 pm_trigger{};
29 INSERT_PADDING_WORDS(47);
30 u32 set_application_id{};
31 u32 set_watchdog_timer{};
32 INSERT_PADDING_WORDS(17);
33 u32 context_save_area{};
34 u32 context_switch{};
35 INSERT_PADDING_WORDS(43);
36 u32 execute{};
37 INSERT_PADDING_WORDS(63);
38 std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{};
39 u32 picture_index{};
40 u32 control_params{};
41 u32 config_struct_offset{};
42 u32 filter_struct_offset{};
43 u32 palette_offset{};
44 u32 hist_offset{};
45 u32 context_id{};
46 u32 fce_ucode_size{};
47 PlaneOffsets output_surface{};
48 u32 fce_ucode_offset{};
49 INSERT_PADDING_WORDS(4);
50 std::array<u32, 8> slot_context_id{};
51 INSERT_PADDING_WORDS(16);
52};
53static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size");
54
55class Vic {
56public:
57 enum class Method : u32 {
58 Execute = 0xc0,
59 SetControlParams = 0x1c1,
60 SetConfigStructOffset = 0x1c2,
61 SetOutputSurfaceLumaOffset = 0x1c8,
62 SetOutputSurfaceChromaUOffset = 0x1c9,
63 SetOutputSurfaceChromaVOffset = 0x1ca
64 };
65
66 explicit Vic(GPU& gpu, std::shared_ptr<Tegra::Nvdec> nvdec_processor);
67 ~Vic();
68
69 /// Write to the device state.
70 void ProcessMethod(Vic::Method method, const std::vector<u32>& arguments);
71
72private:
73 void Execute();
74
75 void VicStateWrite(u32 offset, u32 arguments);
76 VicRegisters vic_state{};
77
78 enum class VideoPixelFormat : u64_le {
79 RGBA8 = 0x1f,
80 BGRA8 = 0x20,
81 Yuv420 = 0x44,
82 };
83
84 union VicConfig {
85 u64_le raw{};
86 BitField<0, 7, u64_le> pixel_format;
87 BitField<7, 2, u64_le> chroma_loc_horiz;
88 BitField<9, 2, u64_le> chroma_loc_vert;
89 BitField<11, 4, u64_le> block_linear_kind;
90 BitField<15, 4, u64_le> block_linear_height_log2;
91 BitField<19, 3, u64_le> reserved0;
92 BitField<22, 10, u64_le> reserved1;
93 BitField<32, 14, u64_le> surface_width_minus1;
94 BitField<46, 14, u64_le> surface_height_minus1;
95 };
96
97 GPU& gpu;
98 std::shared_ptr<Tegra::Nvdec> nvdec_processor;
99
100 GPUVAddr config_struct_address{};
101 GPUVAddr output_surface_luma_address{};
102 GPUVAddr output_surface_chroma_u_address{};
103 GPUVAddr output_surface_chroma_v_address{};
104
105 SwsContext* scaler_ctx{};
106 s32 scaler_width{};
107 s32 scaler_height{};
108};
109
110} // namespace Tegra