diff options
Diffstat (limited to 'src/video_core/command_classes')
| -rw-r--r-- | src/video_core/command_classes/codecs/codec.cpp | 114 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/codec.h | 68 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/h264.cpp | 276 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/h264.h | 130 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/vp9.cpp | 1010 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/vp9.h | 216 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/vp9_types.h | 369 | ||||
| -rw-r--r-- | src/video_core/command_classes/host1x.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/command_classes/host1x.h | 78 | ||||
| -rw-r--r-- | src/video_core/command_classes/nvdec.cpp | 56 | ||||
| -rw-r--r-- | src/video_core/command_classes/nvdec.h | 39 | ||||
| -rw-r--r-- | src/video_core/command_classes/nvdec_common.h | 48 | ||||
| -rw-r--r-- | src/video_core/command_classes/sync_manager.cpp | 60 | ||||
| -rw-r--r-- | src/video_core/command_classes/sync_manager.h | 64 | ||||
| -rw-r--r-- | src/video_core/command_classes/vic.cpp | 180 | ||||
| -rw-r--r-- | src/video_core/command_classes/vic.h | 110 |
16 files changed, 2857 insertions, 0 deletions
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp new file mode 100644 index 000000000..2df410be8 --- /dev/null +++ b/src/video_core/command_classes/codecs/codec.cpp | |||
| @@ -0,0 +1,114 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <fstream> | ||
| 7 | #include "common/assert.h" | ||
| 8 | #include "video_core/command_classes/codecs/codec.h" | ||
| 9 | #include "video_core/command_classes/codecs/h264.h" | ||
| 10 | #include "video_core/command_classes/codecs/vp9.h" | ||
| 11 | #include "video_core/gpu.h" | ||
| 12 | #include "video_core/memory_manager.h" | ||
| 13 | |||
| 14 | extern "C" { | ||
| 15 | #include <libavutil/opt.h> | ||
| 16 | } | ||
| 17 | |||
| 18 | namespace Tegra { | ||
| 19 | |||
| 20 | Codec::Codec(GPU& gpu_) | ||
| 21 | : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), | ||
| 22 | vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} | ||
| 23 | |||
| 24 | Codec::~Codec() { | ||
| 25 | if (!initialized) { | ||
| 26 | return; | ||
| 27 | } | ||
| 28 | // Free libav memory | ||
| 29 | avcodec_send_packet(av_codec_ctx, nullptr); | ||
| 30 | avcodec_receive_frame(av_codec_ctx, av_frame); | ||
| 31 | avcodec_flush_buffers(av_codec_ctx); | ||
| 32 | |||
| 33 | av_frame_unref(av_frame); | ||
| 34 | av_free(av_frame); | ||
| 35 | avcodec_close(av_codec_ctx); | ||
| 36 | } | ||
| 37 | |||
| 38 | void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { | ||
| 39 | LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); | ||
| 40 | current_codec = codec; | ||
| 41 | } | ||
| 42 | |||
| 43 | void Codec::StateWrite(u32 offset, u64 arguments) { | ||
| 44 | u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64); | ||
| 45 | std::memcpy(state_offset, &arguments, sizeof(u64)); | ||
| 46 | } | ||
| 47 | |||
| 48 | void Codec::Decode() { | ||
| 49 | bool is_first_frame = false; | ||
| 50 | |||
| 51 | if (!initialized) { | ||
| 52 | if (current_codec == NvdecCommon::VideoCodec::H264) { | ||
| 53 | av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); | ||
| 54 | } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { | ||
| 55 | av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); | ||
| 56 | } else { | ||
| 57 | LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast<u32>(current_codec)); | ||
| 58 | return; | ||
| 59 | } | ||
| 60 | |||
| 61 | av_codec_ctx = avcodec_alloc_context3(av_codec); | ||
| 62 | av_frame = av_frame_alloc(); | ||
| 63 | av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); | ||
| 64 | |||
| 65 | // TODO(ameerj): libavcodec gpu hw acceleration | ||
| 66 | |||
| 67 | const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); | ||
| 68 | if (av_error < 0) { | ||
| 69 | LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); | ||
| 70 | av_frame_unref(av_frame); | ||
| 71 | av_free(av_frame); | ||
| 72 | avcodec_close(av_codec_ctx); | ||
| 73 | return; | ||
| 74 | } | ||
| 75 | initialized = true; | ||
| 76 | is_first_frame = true; | ||
| 77 | } | ||
| 78 | bool vp9_hidden_frame = false; | ||
| 79 | |||
| 80 | AVPacket packet{}; | ||
| 81 | av_init_packet(&packet); | ||
| 82 | std::vector<u8> frame_data; | ||
| 83 | |||
| 84 | if (current_codec == NvdecCommon::VideoCodec::H264) { | ||
| 85 | frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame); | ||
| 86 | } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { | ||
| 87 | frame_data = vp9_decoder->ComposeFrameHeader(state); | ||
| 88 | vp9_hidden_frame = vp9_decoder->WasFrameHidden(); | ||
| 89 | } | ||
| 90 | |||
| 91 | packet.data = frame_data.data(); | ||
| 92 | packet.size = static_cast<int>(frame_data.size()); | ||
| 93 | |||
| 94 | avcodec_send_packet(av_codec_ctx, &packet); | ||
| 95 | |||
| 96 | if (!vp9_hidden_frame) { | ||
| 97 | // Only receive/store visible frames | ||
| 98 | avcodec_receive_frame(av_codec_ctx, av_frame); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | |||
| 102 | AVFrame* Codec::GetCurrentFrame() { | ||
| 103 | return av_frame; | ||
| 104 | } | ||
| 105 | |||
| 106 | const AVFrame* Codec::GetCurrentFrame() const { | ||
| 107 | return av_frame; | ||
| 108 | } | ||
| 109 | |||
| 110 | NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { | ||
| 111 | return current_codec; | ||
| 112 | } | ||
| 113 | |||
| 114 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h new file mode 100644 index 000000000..2e56daf29 --- /dev/null +++ b/src/video_core/command_classes/codecs/codec.h | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <vector> | ||
| 9 | #include "common/common_funcs.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/command_classes/nvdec_common.h" | ||
| 12 | |||
| 13 | extern "C" { | ||
| 14 | #if defined(__GNUC__) || defined(__clang__) | ||
| 15 | #pragma GCC diagnostic ignored "-Wconversion" | ||
| 16 | #endif | ||
| 17 | #include <libavcodec/avcodec.h> | ||
| 18 | #if defined(__GNUC__) || defined(__clang__) | ||
| 19 | #pragma GCC diagnostic pop | ||
| 20 | #endif | ||
| 21 | } | ||
| 22 | |||
| 23 | namespace Tegra { | ||
| 24 | class GPU; | ||
| 25 | struct VicRegisters; | ||
| 26 | |||
| 27 | namespace Decoder { | ||
| 28 | class H264; | ||
| 29 | class VP9; | ||
| 30 | } // namespace Decoder | ||
| 31 | |||
| 32 | class Codec { | ||
| 33 | public: | ||
| 34 | explicit Codec(GPU& gpu); | ||
| 35 | ~Codec(); | ||
| 36 | |||
| 37 | /// Sets NVDEC video stream codec | ||
| 38 | void SetTargetCodec(NvdecCommon::VideoCodec codec); | ||
| 39 | |||
| 40 | /// Populate NvdecRegisters state with argument value at the provided offset | ||
| 41 | void StateWrite(u32 offset, u64 arguments); | ||
| 42 | |||
| 43 | /// Call decoders to construct headers, decode AVFrame with ffmpeg | ||
| 44 | void Decode(); | ||
| 45 | |||
| 46 | /// Returns most recently decoded frame | ||
| 47 | AVFrame* GetCurrentFrame(); | ||
| 48 | const AVFrame* GetCurrentFrame() const; | ||
| 49 | |||
| 50 | /// Returns the value of current_codec | ||
| 51 | NvdecCommon::VideoCodec GetCurrentCodec() const; | ||
| 52 | |||
| 53 | private: | ||
| 54 | bool initialized{}; | ||
| 55 | NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; | ||
| 56 | |||
| 57 | AVCodec* av_codec{nullptr}; | ||
| 58 | AVCodecContext* av_codec_ctx{nullptr}; | ||
| 59 | AVFrame* av_frame{nullptr}; | ||
| 60 | |||
| 61 | GPU& gpu; | ||
| 62 | std::unique_ptr<Decoder::H264> h264_decoder; | ||
| 63 | std::unique_ptr<Decoder::VP9> vp9_decoder; | ||
| 64 | |||
| 65 | NvdecCommon::NvdecRegisters state{}; | ||
| 66 | }; | ||
| 67 | |||
| 68 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp new file mode 100644 index 000000000..1a39f7b23 --- /dev/null +++ b/src/video_core/command_classes/codecs/h264.cpp | |||
| @@ -0,0 +1,276 @@ | |||
| 1 | // MIT License | ||
| 2 | // | ||
| 3 | // Copyright (c) Ryujinx Team and Contributors | ||
| 4 | // | ||
| 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and | ||
| 6 | // associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| 7 | // including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| 8 | // sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| 9 | // furnished to do so, subject to the following conditions: | ||
| 10 | // | ||
| 11 | // The above copyright notice and this permission notice shall be included in all copies or | ||
| 12 | // substantial portions of the Software. | ||
| 13 | // | ||
| 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT | ||
| 15 | // NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 16 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| 17 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| 19 | // | ||
| 20 | |||
| 21 | #include "common/bit_util.h" | ||
| 22 | #include "video_core/command_classes/codecs/h264.h" | ||
| 23 | #include "video_core/gpu.h" | ||
| 24 | #include "video_core/memory_manager.h" | ||
| 25 | |||
| 26 | namespace Tegra::Decoder { | ||
| 27 | H264::H264(GPU& gpu_) : gpu(gpu_) {} | ||
| 28 | |||
| 29 | H264::~H264() = default; | ||
| 30 | |||
| 31 | std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) { | ||
| 32 | H264DecoderContext context{}; | ||
| 33 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); | ||
| 34 | |||
| 35 | const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); | ||
| 36 | if (!is_first_frame && frame_number != 0) { | ||
| 37 | frame.resize(context.frame_data_size); | ||
| 38 | |||
| 39 | gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); | ||
| 40 | } else { | ||
| 41 | /// Encode header | ||
| 42 | H264BitWriter writer{}; | ||
| 43 | writer.WriteU(1, 24); | ||
| 44 | writer.WriteU(0, 1); | ||
| 45 | writer.WriteU(3, 2); | ||
| 46 | writer.WriteU(7, 5); | ||
| 47 | writer.WriteU(100, 8); | ||
| 48 | writer.WriteU(0, 8); | ||
| 49 | writer.WriteU(31, 8); | ||
| 50 | writer.WriteUe(0); | ||
| 51 | const s32 chroma_format_idc = (context.h264_parameter_set.flags >> 12) & 0x3; | ||
| 52 | writer.WriteUe(chroma_format_idc); | ||
| 53 | if (chroma_format_idc == 3) { | ||
| 54 | writer.WriteBit(false); | ||
| 55 | } | ||
| 56 | |||
| 57 | writer.WriteUe(0); | ||
| 58 | writer.WriteUe(0); | ||
| 59 | writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag | ||
| 60 | writer.WriteBit(false); // Scaling matrix present flag | ||
| 61 | |||
| 62 | const s32 order_cnt_type = static_cast<s32>((context.h264_parameter_set.flags >> 14) & 3); | ||
| 63 | writer.WriteUe(static_cast<s32>((context.h264_parameter_set.flags >> 8) & 0xf)); | ||
| 64 | writer.WriteUe(order_cnt_type); | ||
| 65 | if (order_cnt_type == 0) { | ||
| 66 | writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); | ||
| 67 | } else if (order_cnt_type == 1) { | ||
| 68 | writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); | ||
| 69 | |||
| 70 | writer.WriteSe(0); | ||
| 71 | writer.WriteSe(0); | ||
| 72 | writer.WriteUe(0); | ||
| 73 | } | ||
| 74 | |||
| 75 | const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / | ||
| 76 | (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); | ||
| 77 | |||
| 78 | writer.WriteUe(16); | ||
| 79 | writer.WriteBit(false); | ||
| 80 | writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); | ||
| 81 | writer.WriteUe(pic_height - 1); | ||
| 82 | writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); | ||
| 83 | |||
| 84 | if (!context.h264_parameter_set.frame_mbs_only_flag) { | ||
| 85 | writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); | ||
| 86 | } | ||
| 87 | |||
| 88 | writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); | ||
| 89 | writer.WriteBit(false); // Frame cropping flag | ||
| 90 | writer.WriteBit(false); // VUI parameter present flag | ||
| 91 | |||
| 92 | writer.End(); | ||
| 93 | |||
| 94 | // H264 PPS | ||
| 95 | writer.WriteU(1, 24); | ||
| 96 | writer.WriteU(0, 1); | ||
| 97 | writer.WriteU(3, 2); | ||
| 98 | writer.WriteU(8, 5); | ||
| 99 | |||
| 100 | writer.WriteUe(0); | ||
| 101 | writer.WriteUe(0); | ||
| 102 | |||
| 103 | writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag); | ||
| 104 | writer.WriteBit(false); | ||
| 105 | writer.WriteUe(0); | ||
| 106 | writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); | ||
| 107 | writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); | ||
| 108 | writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); | ||
| 109 | writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); | ||
| 110 | s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); | ||
| 111 | pic_init_qp = (pic_init_qp << 26) >> 26; | ||
| 112 | writer.WriteSe(pic_init_qp); | ||
| 113 | writer.WriteSe(0); | ||
| 114 | s32 chroma_qp_index_offset = | ||
| 115 | static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f); | ||
| 116 | chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; | ||
| 117 | |||
| 118 | writer.WriteSe(chroma_qp_index_offset); | ||
| 119 | writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); | ||
| 120 | writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); | ||
| 121 | writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); | ||
| 122 | writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); | ||
| 123 | |||
| 124 | writer.WriteBit(true); | ||
| 125 | |||
| 126 | for (s32 index = 0; index < 6; index++) { | ||
| 127 | writer.WriteBit(true); | ||
| 128 | const auto matrix_x4 = | ||
| 129 | std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); | ||
| 130 | writer.WriteScalingList(matrix_x4, index * 16, 16); | ||
| 131 | } | ||
| 132 | |||
| 133 | if (context.h264_parameter_set.transform_8x8_mode_flag) { | ||
| 134 | for (s32 index = 0; index < 2; index++) { | ||
| 135 | writer.WriteBit(true); | ||
| 136 | const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(), | ||
| 137 | context.scaling_matrix_8.end()); | ||
| 138 | |||
| 139 | writer.WriteScalingList(matrix_x8, index * 64, 64); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | s32 chroma_qp_index_offset2 = | ||
| 144 | static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); | ||
| 145 | chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; | ||
| 146 | |||
| 147 | writer.WriteSe(chroma_qp_index_offset2); | ||
| 148 | |||
| 149 | writer.End(); | ||
| 150 | |||
| 151 | const auto& encoded_header = writer.GetByteArray(); | ||
| 152 | frame.resize(encoded_header.size() + context.frame_data_size); | ||
| 153 | std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); | ||
| 154 | |||
| 155 | gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, | ||
| 156 | frame.data() + encoded_header.size(), | ||
| 157 | context.frame_data_size); | ||
| 158 | } | ||
| 159 | |||
| 160 | return frame; | ||
| 161 | } | ||
| 162 | |||
| 163 | H264BitWriter::H264BitWriter() = default; | ||
| 164 | |||
| 165 | H264BitWriter::~H264BitWriter() = default; | ||
| 166 | |||
| 167 | void H264BitWriter::WriteU(s32 value, s32 value_sz) { | ||
| 168 | WriteBits(value, value_sz); | ||
| 169 | } | ||
| 170 | |||
| 171 | void H264BitWriter::WriteSe(s32 value) { | ||
| 172 | WriteExpGolombCodedInt(value); | ||
| 173 | } | ||
| 174 | |||
| 175 | void H264BitWriter::WriteUe(s32 value) { | ||
| 176 | WriteExpGolombCodedUInt((u32)value); | ||
| 177 | } | ||
| 178 | |||
| 179 | void H264BitWriter::End() { | ||
| 180 | WriteBit(true); | ||
| 181 | Flush(); | ||
| 182 | } | ||
| 183 | |||
| 184 | void H264BitWriter::WriteBit(bool state) { | ||
| 185 | WriteBits(state ? 1 : 0, 1); | ||
| 186 | } | ||
| 187 | |||
| 188 | void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { | ||
| 189 | std::vector<u8> scan(count); | ||
| 190 | if (count == 16) { | ||
| 191 | std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); | ||
| 192 | } else { | ||
| 193 | std::memcpy(scan.data(), zig_zag_direct.data(), scan.size()); | ||
| 194 | } | ||
| 195 | u8 last_scale = 8; | ||
| 196 | |||
| 197 | for (s32 index = 0; index < count; index++) { | ||
| 198 | const u8 value = list[start + scan[index]]; | ||
| 199 | const s32 delta_scale = static_cast<s32>(value - last_scale); | ||
| 200 | |||
| 201 | WriteSe(delta_scale); | ||
| 202 | |||
| 203 | last_scale = value; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | std::vector<u8>& H264BitWriter::GetByteArray() { | ||
| 208 | return byte_array; | ||
| 209 | } | ||
| 210 | |||
| 211 | const std::vector<u8>& H264BitWriter::GetByteArray() const { | ||
| 212 | return byte_array; | ||
| 213 | } | ||
| 214 | |||
| 215 | void H264BitWriter::WriteBits(s32 value, s32 bit_count) { | ||
| 216 | s32 value_pos = 0; | ||
| 217 | |||
| 218 | s32 remaining = bit_count; | ||
| 219 | |||
| 220 | while (remaining > 0) { | ||
| 221 | s32 copy_size = remaining; | ||
| 222 | |||
| 223 | const s32 free_bits = GetFreeBufferBits(); | ||
| 224 | |||
| 225 | if (copy_size > free_bits) { | ||
| 226 | copy_size = free_bits; | ||
| 227 | } | ||
| 228 | |||
| 229 | const s32 mask = (1 << copy_size) - 1; | ||
| 230 | |||
| 231 | const s32 src_shift = (bit_count - value_pos) - copy_size; | ||
| 232 | const s32 dst_shift = (buffer_size - buffer_pos) - copy_size; | ||
| 233 | |||
| 234 | buffer |= ((value >> src_shift) & mask) << dst_shift; | ||
| 235 | |||
| 236 | value_pos += copy_size; | ||
| 237 | buffer_pos += copy_size; | ||
| 238 | remaining -= copy_size; | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | void H264BitWriter::WriteExpGolombCodedInt(s32 value) { | ||
| 243 | const s32 sign = value <= 0 ? 0 : 1; | ||
| 244 | if (value < 0) { | ||
| 245 | value = -value; | ||
| 246 | } | ||
| 247 | value = (value << 1) - sign; | ||
| 248 | WriteExpGolombCodedUInt(value); | ||
| 249 | } | ||
| 250 | |||
| 251 | void H264BitWriter::WriteExpGolombCodedUInt(u32 value) { | ||
| 252 | const s32 size = 32 - Common::CountLeadingZeroes32(static_cast<s32>(value + 1)); | ||
| 253 | WriteBits(1, size); | ||
| 254 | |||
| 255 | value -= (1U << (size - 1)) - 1; | ||
| 256 | WriteBits(static_cast<s32>(value), size - 1); | ||
| 257 | } | ||
| 258 | |||
| 259 | s32 H264BitWriter::GetFreeBufferBits() { | ||
| 260 | if (buffer_pos == buffer_size) { | ||
| 261 | Flush(); | ||
| 262 | } | ||
| 263 | |||
| 264 | return buffer_size - buffer_pos; | ||
| 265 | } | ||
| 266 | |||
| 267 | void H264BitWriter::Flush() { | ||
| 268 | if (buffer_pos == 0) { | ||
| 269 | return; | ||
| 270 | } | ||
| 271 | byte_array.push_back(static_cast<u8>(buffer)); | ||
| 272 | |||
| 273 | buffer = 0; | ||
| 274 | buffer_pos = 0; | ||
| 275 | } | ||
| 276 | } // namespace Tegra::Decoder | ||
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h new file mode 100644 index 000000000..21752dd90 --- /dev/null +++ b/src/video_core/command_classes/codecs/h264.h | |||
| @@ -0,0 +1,130 @@ | |||
| 1 | // MIT License | ||
| 2 | // | ||
| 3 | // Copyright (c) Ryujinx Team and Contributors | ||
| 4 | // | ||
| 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and | ||
| 6 | // associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| 7 | // including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| 8 | // sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| 9 | // furnished to do so, subject to the following conditions: | ||
| 10 | // | ||
| 11 | // The above copyright notice and this permission notice shall be included in all copies or | ||
| 12 | // substantial portions of the Software. | ||
| 13 | // | ||
| 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT | ||
| 15 | // NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 16 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| 17 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| 19 | // | ||
| 20 | |||
| 21 | #pragma once | ||
| 22 | |||
| 23 | #include <vector> | ||
| 24 | #include "common/common_funcs.h" | ||
| 25 | #include "common/common_types.h" | ||
| 26 | #include "video_core/command_classes/nvdec_common.h" | ||
| 27 | |||
| 28 | namespace Tegra { | ||
| 29 | class GPU; | ||
| 30 | namespace Decoder { | ||
| 31 | |||
| 32 | class H264BitWriter { | ||
| 33 | public: | ||
| 34 | H264BitWriter(); | ||
| 35 | ~H264BitWriter(); | ||
| 36 | |||
| 37 | /// The following Write methods are based on clause 9.1 in the H.264 specification. | ||
| 38 | /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax | ||
| 39 | void WriteU(s32 value, s32 value_sz); | ||
| 40 | void WriteSe(s32 value); | ||
| 41 | void WriteUe(s32 value); | ||
| 42 | |||
| 43 | /// Finalize the bitstream | ||
| 44 | void End(); | ||
| 45 | |||
| 46 | /// append a bit to the stream, equivalent value to the state parameter | ||
| 47 | void WriteBit(bool state); | ||
| 48 | |||
| 49 | /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification | ||
| 50 | /// Writes the scaling matrices of the sream | ||
| 51 | void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); | ||
| 52 | |||
| 53 | /// Return the bitstream as a vector. | ||
| 54 | std::vector<u8>& GetByteArray(); | ||
| 55 | const std::vector<u8>& GetByteArray() const; | ||
| 56 | |||
| 57 | private: | ||
| 58 | // ZigZag LUTs from libavcodec. | ||
| 59 | static constexpr std::array<u8, 64> zig_zag_direct{ | ||
| 60 | 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, | ||
| 61 | 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, | ||
| 62 | 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, | ||
| 63 | }; | ||
| 64 | |||
| 65 | static constexpr std::array<u8, 16> zig_zag_scan{ | ||
| 66 | 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, | ||
| 67 | 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, | ||
| 68 | }; | ||
| 69 | |||
| 70 | void WriteBits(s32 value, s32 bit_count); | ||
| 71 | void WriteExpGolombCodedInt(s32 value); | ||
| 72 | void WriteExpGolombCodedUInt(u32 value); | ||
| 73 | s32 GetFreeBufferBits(); | ||
| 74 | void Flush(); | ||
| 75 | |||
| 76 | s32 buffer_size{8}; | ||
| 77 | |||
| 78 | s32 buffer{}; | ||
| 79 | s32 buffer_pos{}; | ||
| 80 | std::vector<u8> byte_array; | ||
| 81 | }; | ||
| 82 | |||
| 83 | class H264 { | ||
| 84 | public: | ||
| 85 | explicit H264(GPU& gpu); | ||
| 86 | ~H264(); | ||
| 87 | |||
| 88 | /// Compose the H264 header of the frame for FFmpeg decoding | ||
| 89 | std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, | ||
| 90 | bool is_first_frame = false); | ||
| 91 | |||
| 92 | private: | ||
| 93 | struct H264ParameterSet { | ||
| 94 | u32 log2_max_pic_order_cnt{}; | ||
| 95 | u32 delta_pic_order_always_zero_flag{}; | ||
| 96 | u32 frame_mbs_only_flag{}; | ||
| 97 | u32 pic_width_in_mbs{}; | ||
| 98 | u32 pic_height_in_map_units{}; | ||
| 99 | INSERT_PADDING_WORDS(1); | ||
| 100 | u32 entropy_coding_mode_flag{}; | ||
| 101 | u32 bottom_field_pic_order_flag{}; | ||
| 102 | u32 num_refidx_l0_default_active{}; | ||
| 103 | u32 num_refidx_l1_default_active{}; | ||
| 104 | u32 deblocking_filter_control_flag{}; | ||
| 105 | u32 redundant_pic_count_flag{}; | ||
| 106 | u32 transform_8x8_mode_flag{}; | ||
| 107 | INSERT_PADDING_WORDS(9); | ||
| 108 | u64 flags{}; | ||
| 109 | u32 frame_number{}; | ||
| 110 | u32 frame_number2{}; | ||
| 111 | }; | ||
| 112 | static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); | ||
| 113 | |||
| 114 | struct H264DecoderContext { | ||
| 115 | INSERT_PADDING_BYTES(0x48); | ||
| 116 | u32 frame_data_size{}; | ||
| 117 | INSERT_PADDING_BYTES(0xc); | ||
| 118 | H264ParameterSet h264_parameter_set{}; | ||
| 119 | INSERT_PADDING_BYTES(0x100); | ||
| 120 | std::array<u8, 0x60> scaling_matrix_4; | ||
| 121 | std::array<u8, 0x80> scaling_matrix_8; | ||
| 122 | }; | ||
| 123 | static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); | ||
| 124 | |||
| 125 | std::vector<u8> frame; | ||
| 126 | GPU& gpu; | ||
| 127 | }; | ||
| 128 | |||
| 129 | } // namespace Decoder | ||
| 130 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp new file mode 100644 index 000000000..3bae0bb5d --- /dev/null +++ b/src/video_core/command_classes/codecs/vp9.cpp | |||
| @@ -0,0 +1,1010 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> // for std::memcpy | ||
| 6 | #include <numeric> | ||
| 7 | #include "video_core/command_classes/codecs/vp9.h" | ||
| 8 | #include "video_core/gpu.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | |||
| 11 | namespace Tegra::Decoder { | ||
| 12 | |||
| 13 | // Default compressed header probabilities once frame context resets | ||
| 14 | constexpr Vp9EntropyProbs default_probs{ | ||
| 15 | .y_mode_prob{ | ||
| 16 | 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78, | ||
| 17 | 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29, | ||
| 18 | }, | ||
| 19 | .partition_prob{ | ||
| 20 | 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0, | ||
| 21 | 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0, | ||
| 22 | 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0, | ||
| 23 | 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0, | ||
| 24 | }, | ||
| 25 | .coef_probs{ | ||
| 26 | 195, 29, 183, 0, 84, 49, 136, 0, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 27 | 0, 0, 0, 0, 31, 107, 169, 0, 35, 99, 159, 0, 17, 82, 140, 0, 8, 66, 114, 0, | ||
| 28 | 2, 44, 76, 0, 1, 19, 32, 0, 40, 132, 201, 0, 29, 114, 187, 0, 13, 91, 157, 0, | ||
| 29 | 7, 75, 127, 0, 3, 58, 95, 0, 1, 28, 47, 0, 69, 142, 221, 0, 42, 122, 201, 0, | ||
| 30 | 15, 91, 159, 0, 6, 67, 121, 0, 1, 42, 77, 0, 1, 17, 31, 0, 102, 148, 228, 0, | ||
| 31 | 67, 117, 204, 0, 17, 82, 154, 0, 6, 59, 114, 0, 2, 39, 75, 0, 1, 15, 29, 0, | ||
| 32 | 156, 57, 233, 0, 119, 57, 212, 0, 58, 48, 163, 0, 29, 40, 124, 0, 12, 30, 81, 0, | ||
| 33 | 3, 12, 31, 0, 191, 107, 226, 0, 124, 117, 204, 0, 25, 99, 155, 0, 0, 0, 0, 0, | ||
| 34 | 0, 0, 0, 0, 0, 0, 0, 0, 29, 148, 210, 0, 37, 126, 194, 0, 8, 93, 157, 0, | ||
| 35 | 2, 68, 118, 0, 1, 39, 69, 0, 1, 17, 33, 0, 41, 151, 213, 0, 27, 123, 193, 0, | ||
| 36 | 3, 82, 144, 0, 1, 58, 105, 0, 1, 32, 60, 0, 1, 13, 26, 0, 59, 159, 220, 0, | ||
| 37 | 23, 126, 198, 0, 4, 88, 151, 0, 1, 66, 114, 0, 1, 38, 71, 0, 1, 18, 34, 0, | ||
| 38 | 114, 136, 232, 0, 51, 114, 207, 0, 11, 83, 155, 0, 3, 56, 105, 0, 1, 33, 65, 0, | ||
| 39 | 1, 17, 34, 0, 149, 65, 234, 0, 121, 57, 215, 0, 61, 49, 166, 0, 28, 36, 114, 0, | ||
| 40 | 12, 25, 76, 0, 3, 16, 42, 0, 214, 49, 220, 0, 132, 63, 188, 0, 42, 65, 137, 0, | ||
| 41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 137, 221, 0, 104, 131, 216, 0, | ||
| 42 | 49, 111, 192, 0, 21, 87, 155, 0, 2, 49, 87, 0, 1, 16, 28, 0, 89, 163, 230, 0, | ||
| 43 | 90, 137, 220, 0, 29, 100, 183, 0, 10, 70, 135, 0, 2, 42, 81, 0, 1, 17, 33, 0, | ||
| 44 | 108, 167, 237, 0, 55, 133, 222, 0, 15, 97, 179, 0, 4, 72, 135, 0, 1, 45, 85, 0, | ||
| 45 | 1, 19, 38, 0, 124, 146, 240, 0, 66, 124, 224, 0, 17, 88, 175, 0, 4, 58, 122, 0, | ||
| 46 | 1, 36, 75, 0, 1, 18, 37, 0, 141, 79, 241, 0, 126, 70, 227, 0, 66, 58, 182, 0, | ||
| 47 | 30, 44, 136, 0, 12, 34, 96, 0, 2, 20, 47, 0, 229, 99, 249, 0, 143, 111, 235, 0, | ||
| 48 | 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 158, 236, 0, | ||
| 49 | 94, 146, 224, 0, 25, 117, 191, 0, 9, 87, 149, 0, 3, 56, 99, 0, 1, 33, 57, 0, | ||
| 50 | 83, 167, 237, 0, 68, 145, 222, 0, 10, 103, 177, 0, 2, 72, 131, 0, 1, 41, 79, 0, | ||
| 51 | 1, 20, 39, 0, 99, 167, 239, 0, 47, 141, 224, 0, 10, 104, 178, 0, 2, 73, 133, 0, | ||
| 52 | 1, 44, 85, 0, 1, 22, 47, 0, 127, 145, 243, 0, 71, 129, 228, 0, 17, 93, 177, 0, | ||
| 53 | 3, 61, 124, 0, 1, 41, 84, 0, 1, 21, 52, 0, 157, 78, 244, 0, 140, 72, 231, 0, | ||
| 54 | 69, 58, 184, 0, 31, 44, 137, 0, 14, 38, 105, 0, 8, 23, 61, 0, 125, 34, 187, 0, | ||
| 55 | 52, 41, 133, 0, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 56 | 37, 109, 153, 0, 51, 102, 147, 0, 23, 87, 128, 0, 8, 67, 101, 0, 1, 41, 63, 0, | ||
| 57 | 1, 19, 29, 0, 31, 154, 185, 0, 17, 127, 175, 0, 6, 96, 145, 0, 2, 73, 114, 0, | ||
| 58 | 1, 51, 82, 0, 1, 28, 45, 0, 23, 163, 200, 0, 10, 131, 185, 0, 2, 93, 148, 0, | ||
| 59 | 1, 67, 111, 0, 1, 41, 69, 0, 1, 14, 24, 0, 29, 176, 217, 0, 12, 145, 201, 0, | ||
| 60 | 3, 101, 156, 0, 1, 69, 111, 0, 1, 39, 63, 0, 1, 14, 23, 0, 57, 192, 233, 0, | ||
| 61 | 25, 154, 215, 0, 6, 109, 167, 0, 3, 78, 118, 0, 1, 48, 69, 0, 1, 21, 29, 0, | ||
| 62 | 202, 105, 245, 0, 108, 106, 216, 0, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 63 | 0, 0, 0, 0, 33, 172, 219, 0, 64, 149, 206, 0, 14, 117, 177, 0, 5, 90, 141, 0, | ||
| 64 | 2, 61, 95, 0, 1, 37, 57, 0, 33, 179, 220, 0, 11, 140, 198, 0, 1, 89, 148, 0, | ||
| 65 | 1, 60, 104, 0, 1, 33, 57, 0, 1, 12, 21, 0, 30, 181, 221, 0, 8, 141, 198, 0, | ||
| 66 | 1, 87, 145, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 20, 0, 32, 186, 224, 0, | ||
| 67 | 7, 142, 198, 0, 1, 86, 143, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 22, 0, | ||
| 68 | 57, 192, 227, 0, 20, 143, 204, 0, 3, 96, 154, 0, 1, 68, 112, 0, 1, 42, 69, 0, | ||
| 69 | 1, 19, 32, 0, 212, 35, 215, 0, 113, 47, 169, 0, 29, 48, 105, 0, 0, 0, 0, 0, | ||
| 70 | 0, 0, 0, 0, 0, 0, 0, 0, 74, 129, 203, 0, 106, 120, 203, 0, 49, 107, 178, 0, | ||
| 71 | 19, 84, 144, 0, 4, 50, 84, 0, 1, 15, 25, 0, 71, 172, 217, 0, 44, 141, 209, 0, | ||
| 72 | 15, 102, 173, 0, 6, 76, 133, 0, 2, 51, 89, 0, 1, 24, 42, 0, 64, 185, 231, 0, | ||
| 73 | 31, 148, 216, 0, 8, 103, 175, 0, 3, 74, 131, 0, 1, 46, 81, 0, 1, 18, 30, 0, | ||
| 74 | 65, 196, 235, 0, 25, 157, 221, 0, 5, 105, 174, 0, 1, 67, 120, 0, 1, 38, 69, 0, | ||
| 75 | 1, 15, 30, 0, 65, 204, 238, 0, 30, 156, 224, 0, 7, 107, 177, 0, 2, 70, 124, 0, | ||
| 76 | 1, 42, 73, 0, 1, 18, 34, 0, 225, 86, 251, 0, 144, 104, 235, 0, 42, 99, 181, 0, | ||
| 77 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 175, 239, 0, 112, 165, 229, 0, | ||
| 78 | 29, 136, 200, 0, 12, 103, 162, 0, 6, 77, 123, 0, 2, 53, 84, 0, 75, 183, 239, 0, | ||
| 79 | 30, 155, 221, 0, 3, 106, 171, 0, 1, 74, 128, 0, 1, 44, 76, 0, 1, 17, 28, 0, | ||
| 80 | 73, 185, 240, 0, 27, 159, 222, 0, 2, 107, 172, 0, 1, 75, 127, 0, 1, 42, 73, 0, | ||
| 81 | 1, 17, 29, 0, 62, 190, 238, 0, 21, 159, 222, 0, 2, 107, 172, 0, 1, 72, 122, 0, | ||
| 82 | 1, 40, 71, 0, 1, 18, 32, 0, 61, 199, 240, 0, 27, 161, 226, 0, 4, 113, 180, 0, | ||
| 83 | 1, 76, 129, 0, 1, 46, 80, 0, 1, 23, 41, 0, 7, 27, 153, 0, 5, 30, 95, 0, | ||
| 84 | 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 75, 127, 0, | ||
| 85 | 57, 75, 124, 0, 27, 67, 108, 0, 10, 54, 86, 0, 1, 33, 52, 0, 1, 12, 18, 0, | ||
| 86 | 43, 125, 151, 0, 26, 108, 148, 0, 7, 83, 122, 0, 2, 59, 89, 0, 1, 38, 60, 0, | ||
| 87 | 1, 17, 27, 0, 23, 144, 163, 0, 13, 112, 154, 0, 2, 75, 117, 0, 1, 50, 81, 0, | ||
| 88 | 1, 31, 51, 0, 1, 14, 23, 0, 18, 162, 185, 0, 6, 123, 171, 0, 1, 78, 125, 0, | ||
| 89 | 1, 51, 86, 0, 1, 31, 54, 0, 1, 14, 23, 0, 15, 199, 227, 0, 3, 150, 204, 0, | ||
| 90 | 1, 91, 146, 0, 1, 55, 95, 0, 1, 30, 53, 0, 1, 11, 20, 0, 19, 55, 240, 0, | ||
| 91 | 19, 59, 196, 0, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 92 | 41, 166, 207, 0, 104, 153, 199, 0, 31, 123, 181, 0, 14, 101, 152, 0, 5, 72, 106, 0, | ||
| 93 | 1, 36, 52, 0, 35, 176, 211, 0, 12, 131, 190, 0, 2, 88, 144, 0, 1, 60, 101, 0, | ||
| 94 | 1, 36, 60, 0, 1, 16, 28, 0, 28, 183, 213, 0, 8, 134, 191, 0, 1, 86, 142, 0, | ||
| 95 | 1, 56, 96, 0, 1, 30, 53, 0, 1, 12, 20, 0, 20, 190, 215, 0, 4, 135, 192, 0, | ||
| 96 | 1, 84, 139, 0, 1, 53, 91, 0, 1, 28, 49, 0, 1, 11, 20, 0, 13, 196, 216, 0, | ||
| 97 | 2, 137, 192, 0, 1, 86, 143, 0, 1, 57, 99, 0, 1, 32, 56, 0, 1, 13, 24, 0, | ||
| 98 | 211, 29, 217, 0, 96, 47, 156, 0, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 99 | 0, 0, 0, 0, 78, 120, 193, 0, 111, 116, 186, 0, 46, 102, 164, 0, 15, 80, 128, 0, | ||
| 100 | 2, 49, 76, 0, 1, 18, 28, 0, 71, 161, 203, 0, 42, 132, 192, 0, 10, 98, 150, 0, | ||
| 101 | 3, 69, 109, 0, 1, 44, 70, 0, 1, 18, 29, 0, 57, 186, 211, 0, 30, 140, 196, 0, | ||
| 102 | 4, 93, 146, 0, 1, 62, 102, 0, 1, 38, 65, 0, 1, 16, 27, 0, 47, 199, 217, 0, | ||
| 103 | 14, 145, 196, 0, 1, 88, 142, 0, 1, 57, 98, 0, 1, 36, 62, 0, 1, 15, 26, 0, | ||
| 104 | 26, 219, 229, 0, 5, 155, 207, 0, 1, 94, 151, 0, 1, 60, 104, 0, 1, 36, 62, 0, | ||
| 105 | 1, 16, 28, 0, 233, 29, 248, 0, 146, 47, 220, 0, 43, 52, 140, 0, 0, 0, 0, 0, | ||
| 106 | 0, 0, 0, 0, 0, 0, 0, 0, 100, 163, 232, 0, 179, 161, 222, 0, 63, 142, 204, 0, | ||
| 107 | 37, 113, 174, 0, 26, 89, 137, 0, 18, 68, 97, 0, 85, 181, 230, 0, 32, 146, 209, 0, | ||
| 108 | 7, 100, 164, 0, 3, 71, 121, 0, 1, 45, 77, 0, 1, 18, 30, 0, 65, 187, 230, 0, | ||
| 109 | 20, 148, 207, 0, 2, 97, 159, 0, 1, 68, 116, 0, 1, 40, 70, 0, 1, 14, 29, 0, | ||
| 110 | 40, 194, 227, 0, 8, 147, 204, 0, 1, 94, 155, 0, 1, 65, 112, 0, 1, 39, 66, 0, | ||
| 111 | 1, 14, 26, 0, 16, 208, 228, 0, 3, 151, 207, 0, 1, 98, 160, 0, 1, 67, 117, 0, | ||
| 112 | 1, 41, 74, 0, 1, 17, 31, 0, 17, 38, 140, 0, 7, 34, 80, 0, 1, 17, 29, 0, | ||
| 113 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 75, 128, 0, 41, 76, 128, 0, | ||
| 114 | 26, 66, 116, 0, 12, 52, 94, 0, 2, 32, 55, 0, 1, 10, 16, 0, 50, 127, 154, 0, | ||
| 115 | 37, 109, 152, 0, 16, 82, 121, 0, 5, 59, 85, 0, 1, 35, 54, 0, 1, 13, 20, 0, | ||
| 116 | 40, 142, 167, 0, 17, 110, 157, 0, 2, 71, 112, 0, 1, 44, 72, 0, 1, 27, 45, 0, | ||
| 117 | 1, 11, 17, 0, 30, 175, 188, 0, 9, 124, 169, 0, 1, 74, 116, 0, 1, 48, 78, 0, | ||
| 118 | 1, 30, 49, 0, 1, 11, 18, 0, 10, 222, 223, 0, 2, 150, 194, 0, 1, 83, 128, 0, | ||
| 119 | 1, 48, 79, 0, 1, 27, 45, 0, 1, 11, 17, 0, 36, 41, 235, 0, 29, 36, 193, 0, | ||
| 120 | 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 165, 222, 0, | ||
| 121 | 177, 162, 215, 0, 110, 135, 195, 0, 57, 113, 168, 0, 23, 83, 120, 0, 10, 49, 61, 0, | ||
| 122 | 85, 190, 223, 0, 36, 139, 200, 0, 5, 90, 146, 0, 1, 60, 103, 0, 1, 38, 65, 0, | ||
| 123 | 1, 18, 30, 0, 72, 202, 223, 0, 23, 141, 199, 0, 2, 86, 140, 0, 1, 56, 97, 0, | ||
| 124 | 1, 36, 61, 0, 1, 16, 27, 0, 55, 218, 225, 0, 13, 145, 200, 0, 1, 86, 141, 0, | ||
| 125 | 1, 57, 99, 0, 1, 35, 61, 0, 1, 13, 22, 0, 15, 235, 212, 0, 1, 132, 184, 0, | ||
| 126 | 1, 84, 139, 0, 1, 57, 97, 0, 1, 34, 56, 0, 1, 14, 23, 0, 181, 21, 201, 0, | ||
| 127 | 61, 37, 123, 0, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 128 | 47, 106, 172, 0, 95, 104, 173, 0, 42, 93, 159, 0, 18, 77, 131, 0, 4, 50, 81, 0, | ||
| 129 | 1, 17, 23, 0, 62, 147, 199, 0, 44, 130, 189, 0, 28, 102, 154, 0, 18, 75, 115, 0, | ||
| 130 | 2, 44, 65, 0, 1, 12, 19, 0, 55, 153, 210, 0, 24, 130, 194, 0, 3, 93, 146, 0, | ||
| 131 | 1, 61, 97, 0, 1, 31, 50, 0, 1, 10, 16, 0, 49, 186, 223, 0, 17, 148, 204, 0, | ||
| 132 | 1, 96, 142, 0, 1, 53, 83, 0, 1, 26, 44, 0, 1, 11, 17, 0, 13, 217, 212, 0, | ||
| 133 | 2, 136, 180, 0, 1, 78, 124, 0, 1, 50, 83, 0, 1, 29, 49, 0, 1, 14, 23, 0, | ||
| 134 | 197, 13, 247, 0, 82, 17, 222, 0, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 135 | 0, 0, 0, 0, 126, 186, 247, 0, 234, 191, 243, 0, 176, 177, 234, 0, 104, 158, 220, 0, | ||
| 136 | 66, 128, 186, 0, 55, 90, 137, 0, 111, 197, 242, 0, 46, 158, 219, 0, 9, 104, 171, 0, | ||
| 137 | 2, 65, 125, 0, 1, 44, 80, 0, 1, 17, 91, 0, 104, 208, 245, 0, 39, 168, 224, 0, | ||
| 138 | 3, 109, 162, 0, 1, 79, 124, 0, 1, 50, 102, 0, 1, 43, 102, 0, 84, 220, 246, 0, | ||
| 139 | 31, 177, 231, 0, 2, 115, 180, 0, 1, 79, 134, 0, 1, 55, 77, 0, 1, 60, 79, 0, | ||
| 140 | 43, 243, 240, 0, 8, 180, 217, 0, 1, 115, 166, 0, 1, 84, 121, 0, 1, 51, 67, 0, | ||
| 141 | 1, 16, 6, 0, | ||
| 142 | }, | ||
| 143 | .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144}, | ||
| 144 | .inter_mode_prob{ | ||
| 145 | 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94, | ||
| 146 | 66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0, | ||
| 147 | }, | ||
| 148 | .intra_inter_prob{9, 102, 187, 225}, | ||
| 149 | .comp_inter_prob{9, 102, 187, 225, 0}, | ||
| 150 | .single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247}, | ||
| 151 | .comp_ref_prob{50, 126, 123, 221, 226}, | ||
| 152 | .tx_32x32_prob{3, 136, 37, 5, 52, 13}, | ||
| 153 | .tx_16x16_prob{20, 152, 15, 101}, | ||
| 154 | .tx_8x8_prob{100, 66}, | ||
| 155 | .skip_probs{192, 128, 64}, | ||
| 156 | .joints{32, 64, 96}, | ||
| 157 | .sign{128, 128}, | ||
| 158 | .classes{ | ||
| 159 | 224, 144, 192, 168, 192, 176, 192, 198, 198, 245, | ||
| 160 | 216, 128, 176, 160, 176, 176, 192, 198, 198, 208, | ||
| 161 | }, | ||
| 162 | .class_0{216, 208}, | ||
| 163 | .prob_bits{ | ||
| 164 | 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, | ||
| 165 | 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, | ||
| 166 | }, | ||
| 167 | .class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64}, | ||
| 168 | .fr{64, 96, 64, 64, 96, 64}, | ||
| 169 | .class_0_hp{160, 160}, | ||
| 170 | .high_precision{128, 128}, | ||
| 171 | }; | ||
| 172 | |||
| 173 | VP9::VP9(GPU& gpu) : gpu(gpu) {} | ||
| 174 | |||
| 175 | VP9::~VP9() = default; | ||
| 176 | |||
| 177 | void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { | ||
| 178 | const bool update = new_prob != old_prob; | ||
| 179 | |||
| 180 | writer.Write(update, diff_update_probability); | ||
| 181 | |||
| 182 | if (update) { | ||
| 183 | WriteProbabilityDelta(writer, new_prob, old_prob); | ||
| 184 | } | ||
| 185 | } | ||
| 186 | template <typename T, std::size_t N> | ||
| 187 | void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, | ||
| 188 | const std::array<T, N>& old_prob) { | ||
| 189 | for (std::size_t offset = 0; offset < new_prob.size(); ++offset) { | ||
| 190 | WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]); | ||
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | template <typename T, std::size_t N> | ||
| 195 | void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, | ||
| 196 | const std::array<T, N>& old_prob) { | ||
| 197 | for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) { | ||
| 198 | WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]); | ||
| 199 | WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]); | ||
| 200 | WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { | ||
| 205 | const int delta = RemapProbability(new_prob, old_prob); | ||
| 206 | |||
| 207 | EncodeTermSubExp(writer, delta); | ||
| 208 | } | ||
| 209 | |||
| 210 | s32 VP9::RemapProbability(s32 new_prob, s32 old_prob) { | ||
| 211 | new_prob--; | ||
| 212 | old_prob--; | ||
| 213 | |||
| 214 | std::size_t index{}; | ||
| 215 | |||
| 216 | if (old_prob * 2 <= 0xff) { | ||
| 217 | index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1)); | ||
| 218 | } else { | ||
| 219 | index = static_cast<std::size_t>( | ||
| 220 | std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); | ||
| 221 | } | ||
| 222 | |||
| 223 | return map_lut[index]; | ||
| 224 | } | ||
| 225 | |||
| 226 | s32 VP9::RecenterNonNeg(s32 new_prob, s32 old_prob) { | ||
| 227 | if (new_prob > old_prob * 2) { | ||
| 228 | return new_prob; | ||
| 229 | } else if (new_prob >= old_prob) { | ||
| 230 | return (new_prob - old_prob) * 2; | ||
| 231 | } else { | ||
| 232 | return (old_prob - new_prob) * 2 - 1; | ||
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) { | ||
| 237 | if (WriteLessThan(writer, value, 16)) { | ||
| 238 | writer.Write(value, 4); | ||
| 239 | } else if (WriteLessThan(writer, value, 32)) { | ||
| 240 | writer.Write(value - 16, 4); | ||
| 241 | } else if (WriteLessThan(writer, value, 64)) { | ||
| 242 | writer.Write(value - 32, 5); | ||
| 243 | } else { | ||
| 244 | value -= 64; | ||
| 245 | |||
| 246 | constexpr s32 size = 8; | ||
| 247 | |||
| 248 | const s32 mask = (1 << size) - 191; | ||
| 249 | |||
| 250 | const s32 delta = value - mask; | ||
| 251 | |||
| 252 | if (delta < 0) { | ||
| 253 | writer.Write(value, size - 1); | ||
| 254 | } else { | ||
| 255 | writer.Write(delta / 2 + mask, size - 1); | ||
| 256 | writer.Write(delta & 1, 1); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | } | ||
| 260 | |||
| 261 | bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) { | ||
| 262 | const bool is_lt = value < test; | ||
| 263 | writer.Write(!is_lt); | ||
| 264 | return is_lt; | ||
| 265 | } | ||
| 266 | |||
| 267 | void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, | ||
| 268 | const std::array<u8, 2304>& new_prob, | ||
| 269 | const std::array<u8, 2304>& old_prob) { | ||
| 270 | // Note: There's 1 byte added on each packet for alignment, | ||
| 271 | // this byte is ignored when doing updates. | ||
| 272 | constexpr s32 block_bytes = 2 * 2 * 6 * 6 * 4; | ||
| 273 | |||
| 274 | const auto needs_update = [&](s32 base_index) -> bool { | ||
| 275 | s32 index = base_index; | ||
| 276 | for (s32 i = 0; i < 2; i++) { | ||
| 277 | for (s32 j = 0; j < 2; j++) { | ||
| 278 | for (s32 k = 0; k < 6; k++) { | ||
| 279 | for (s32 l = 0; l < 6; l++) { | ||
| 280 | if (new_prob[index + 0] != old_prob[index + 0] || | ||
| 281 | new_prob[index + 1] != old_prob[index + 1] || | ||
| 282 | new_prob[index + 2] != old_prob[index + 2]) { | ||
| 283 | return true; | ||
| 284 | } | ||
| 285 | |||
| 286 | index += 4; | ||
| 287 | } | ||
| 288 | } | ||
| 289 | } | ||
| 290 | } | ||
| 291 | return false; | ||
| 292 | }; | ||
| 293 | |||
| 294 | for (s32 block_index = 0; block_index < 4; block_index++) { | ||
| 295 | const s32 base_index = block_index * block_bytes; | ||
| 296 | const bool update = needs_update(base_index); | ||
| 297 | writer.Write(update); | ||
| 298 | |||
| 299 | if (update) { | ||
| 300 | s32 index = base_index; | ||
| 301 | for (s32 i = 0; i < 2; i++) { | ||
| 302 | for (s32 j = 0; j < 2; j++) { | ||
| 303 | for (s32 k = 0; k < 6; k++) { | ||
| 304 | for (s32 l = 0; l < 6; l++) { | ||
| 305 | if (k != 0 || l < 3) { | ||
| 306 | WriteProbabilityUpdate(writer, new_prob[index + 0], | ||
| 307 | old_prob[index + 0]); | ||
| 308 | WriteProbabilityUpdate(writer, new_prob[index + 1], | ||
| 309 | old_prob[index + 1]); | ||
| 310 | WriteProbabilityUpdate(writer, new_prob[index + 2], | ||
| 311 | old_prob[index + 2]); | ||
| 312 | } | ||
| 313 | index += 4; | ||
| 314 | } | ||
| 315 | } | ||
| 316 | } | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | if (block_index == tx_mode) { | ||
| 321 | break; | ||
| 322 | } | ||
| 323 | } | ||
| 324 | } | ||
| 325 | |||
| 326 | void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { | ||
| 327 | const bool update = new_prob != old_prob; | ||
| 328 | writer.Write(update, diff_update_probability); | ||
| 329 | |||
| 330 | if (update) { | ||
| 331 | writer.Write(new_prob >> 1, 7); | ||
| 332 | } | ||
| 333 | } | ||
| 334 | |||
| 335 | s32 VP9::CalcMinLog2TileCols(s32 frame_width) { | ||
| 336 | const s32 sb64_cols = (frame_width + 63) / 64; | ||
| 337 | s32 min_log2 = 0; | ||
| 338 | |||
| 339 | while ((64 << min_log2) < sb64_cols) { | ||
| 340 | min_log2++; | ||
| 341 | } | ||
| 342 | |||
| 343 | return min_log2; | ||
| 344 | } | ||
| 345 | |||
| 346 | s32 VP9::CalcMaxLog2TileCols(s32 frameWidth) { | ||
| 347 | const s32 sb64_cols = (frameWidth + 63) / 64; | ||
| 348 | s32 max_log2 = 1; | ||
| 349 | |||
| 350 | while ((sb64_cols >> max_log2) >= 4) { | ||
| 351 | max_log2++; | ||
| 352 | } | ||
| 353 | |||
| 354 | return max_log2 - 1; | ||
| 355 | } | ||
| 356 | |||
| 357 | Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { | ||
| 358 | PictureInfo picture_info{}; | ||
| 359 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); | ||
| 360 | Vp9PictureInfo vp9_info = picture_info.Convert(); | ||
| 361 | |||
| 362 | InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); | ||
| 363 | |||
| 364 | // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following | ||
| 365 | // order: last, golden, altref, current. It may be worthwhile to track the updates done here | ||
| 366 | // to avoid buffering frame data needed for reference frame updating in the header composition. | ||
| 367 | std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64)); | ||
| 368 | |||
| 369 | return std::move(vp9_info); | ||
| 370 | } | ||
| 371 | |||
| 372 | void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { | ||
| 373 | EntropyProbs entropy{}; | ||
| 374 | gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); | ||
| 375 | entropy.Convert(dst); | ||
| 376 | } | ||
| 377 | |||
| 378 | Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) { | ||
| 379 | Vp9FrameContainer frame{}; | ||
| 380 | { | ||
| 381 | gpu.SyncGuestHost(); | ||
| 382 | frame.info = std::move(GetVp9PictureInfo(state)); | ||
| 383 | |||
| 384 | frame.bit_stream.resize(frame.info.bitstream_size); | ||
| 385 | gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.bit_stream.data(), | ||
| 386 | frame.info.bitstream_size); | ||
| 387 | } | ||
| 388 | // Buffer two frames, saving the last show frame info | ||
| 389 | if (next_next_frame.bit_stream.size() != 0) { | ||
| 390 | Vp9FrameContainer temp{ | ||
| 391 | .info = frame.info, | ||
| 392 | .bit_stream = frame.bit_stream, | ||
| 393 | }; | ||
| 394 | next_next_frame.info.show_frame = frame.info.last_frame_shown; | ||
| 395 | frame.info = next_next_frame.info; | ||
| 396 | frame.bit_stream = next_next_frame.bit_stream; | ||
| 397 | next_next_frame = std::move(temp); | ||
| 398 | |||
| 399 | if (next_frame.bit_stream.size() != 0) { | ||
| 400 | Vp9FrameContainer temp{ | ||
| 401 | .info = frame.info, | ||
| 402 | .bit_stream = frame.bit_stream, | ||
| 403 | }; | ||
| 404 | next_frame.info.show_frame = frame.info.last_frame_shown; | ||
| 405 | frame.info = next_frame.info; | ||
| 406 | frame.bit_stream = next_frame.bit_stream; | ||
| 407 | next_frame = std::move(temp); | ||
| 408 | } else { | ||
| 409 | next_frame.info = frame.info; | ||
| 410 | next_frame.bit_stream = frame.bit_stream; | ||
| 411 | } | ||
| 412 | } else { | ||
| 413 | next_next_frame.info = frame.info; | ||
| 414 | next_next_frame.bit_stream = frame.bit_stream; | ||
| 415 | } | ||
| 416 | return frame; | ||
| 417 | } | ||
| 418 | |||
| 419 | std::vector<u8> VP9::ComposeCompressedHeader() { | ||
| 420 | VpxRangeEncoder writer{}; | ||
| 421 | |||
| 422 | if (!current_frame_info.lossless) { | ||
| 423 | if (static_cast<u32>(current_frame_info.transform_mode) >= 3) { | ||
| 424 | writer.Write(3, 2); | ||
| 425 | writer.Write(current_frame_info.transform_mode == 4); | ||
| 426 | } else { | ||
| 427 | writer.Write(current_frame_info.transform_mode, 2); | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | if (current_frame_info.transform_mode == 4) { | ||
| 432 | // tx_mode_probs() in the spec | ||
| 433 | WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob, | ||
| 434 | prev_frame_probs.tx_8x8_prob); | ||
| 435 | WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob, | ||
| 436 | prev_frame_probs.tx_16x16_prob); | ||
| 437 | WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob, | ||
| 438 | prev_frame_probs.tx_32x32_prob); | ||
| 439 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 440 | prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob; | ||
| 441 | prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob; | ||
| 442 | prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob; | ||
| 443 | } | ||
| 444 | } | ||
| 445 | // read_coef_probs() in the spec | ||
| 446 | WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode, | ||
| 447 | current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs); | ||
| 448 | // read_skip_probs() in the spec | ||
| 449 | WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs, | ||
| 450 | prev_frame_probs.skip_probs); | ||
| 451 | |||
| 452 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 453 | prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs; | ||
| 454 | prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs; | ||
| 455 | } | ||
| 456 | |||
| 457 | if (!current_frame_info.intra_only) { | ||
| 458 | // read_inter_probs() in the spec | ||
| 459 | WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob, | ||
| 460 | prev_frame_probs.inter_mode_prob); | ||
| 461 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 462 | prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob; | ||
| 463 | } | ||
| 464 | |||
| 465 | if (current_frame_info.interp_filter == 4) { | ||
| 466 | // read_interp_filter_probs() in the spec | ||
| 467 | WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob, | ||
| 468 | prev_frame_probs.switchable_interp_prob); | ||
| 469 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 470 | prev_frame_probs.switchable_interp_prob = | ||
| 471 | current_frame_info.entropy.switchable_interp_prob; | ||
| 472 | } | ||
| 473 | } | ||
| 474 | |||
| 475 | // read_is_inter_probs() in the spec | ||
| 476 | WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob, | ||
| 477 | prev_frame_probs.intra_inter_prob); | ||
| 478 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 479 | prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob; | ||
| 480 | } | ||
| 481 | // frame_reference_mode() in the spec | ||
| 482 | if ((current_frame_info.ref_frame_sign_bias[1] & 1) != | ||
| 483 | (current_frame_info.ref_frame_sign_bias[2] & 1) || | ||
| 484 | (current_frame_info.ref_frame_sign_bias[1] & 1) != | ||
| 485 | (current_frame_info.ref_frame_sign_bias[3] & 1)) { | ||
| 486 | if (current_frame_info.reference_mode >= 1) { | ||
| 487 | writer.Write(1, 1); | ||
| 488 | writer.Write(current_frame_info.reference_mode == 2); | ||
| 489 | } else { | ||
| 490 | writer.Write(0, 1); | ||
| 491 | } | ||
| 492 | } | ||
| 493 | |||
| 494 | // frame_reference_mode_probs() in the spec | ||
| 495 | if (current_frame_info.reference_mode == 2) { | ||
| 496 | WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob, | ||
| 497 | prev_frame_probs.comp_inter_prob); | ||
| 498 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 499 | prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob; | ||
| 500 | } | ||
| 501 | } | ||
| 502 | |||
| 503 | if (current_frame_info.reference_mode != 1) { | ||
| 504 | WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob, | ||
| 505 | prev_frame_probs.single_ref_prob); | ||
| 506 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 507 | prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob; | ||
| 508 | } | ||
| 509 | } | ||
| 510 | |||
| 511 | if (current_frame_info.reference_mode != 0) { | ||
| 512 | WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob, | ||
| 513 | prev_frame_probs.comp_ref_prob); | ||
| 514 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 515 | prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob; | ||
| 516 | } | ||
| 517 | } | ||
| 518 | |||
| 519 | // read_y_mode_probs | ||
| 520 | for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size(); | ||
| 521 | ++index) { | ||
| 522 | WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index], | ||
| 523 | prev_frame_probs.y_mode_prob[index]); | ||
| 524 | } | ||
| 525 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 526 | prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob; | ||
| 527 | } | ||
| 528 | // read_partition_probs | ||
| 529 | WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob, | ||
| 530 | prev_frame_probs.partition_prob); | ||
| 531 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 532 | prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob; | ||
| 533 | } | ||
| 534 | |||
| 535 | // mv_probs | ||
| 536 | for (s32 i = 0; i < 3; i++) { | ||
| 537 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i], | ||
| 538 | prev_frame_probs.joints[i]); | ||
| 539 | } | ||
| 540 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 541 | prev_frame_probs.joints = current_frame_info.entropy.joints; | ||
| 542 | } | ||
| 543 | |||
| 544 | for (s32 i = 0; i < 2; i++) { | ||
| 545 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i], | ||
| 546 | prev_frame_probs.sign[i]); | ||
| 547 | |||
| 548 | for (s32 j = 0; j < 10; j++) { | ||
| 549 | const int index = i * 10 + j; | ||
| 550 | |||
| 551 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index], | ||
| 552 | prev_frame_probs.classes[index]); | ||
| 553 | } | ||
| 554 | |||
| 555 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i], | ||
| 556 | prev_frame_probs.class_0[i]); | ||
| 557 | |||
| 558 | for (s32 j = 0; j < 10; j++) { | ||
| 559 | const int index = i * 10 + j; | ||
| 560 | |||
| 561 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index], | ||
| 562 | prev_frame_probs.prob_bits[index]); | ||
| 563 | } | ||
| 564 | } | ||
| 565 | |||
| 566 | for (s32 i = 0; i < 2; i++) { | ||
| 567 | for (s32 j = 0; j < 2; j++) { | ||
| 568 | for (s32 k = 0; k < 3; k++) { | ||
| 569 | const int index = i * 2 * 3 + j * 3 + k; | ||
| 570 | |||
| 571 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index], | ||
| 572 | prev_frame_probs.class_0_fr[index]); | ||
| 573 | } | ||
| 574 | } | ||
| 575 | |||
| 576 | for (s32 j = 0; j < 3; j++) { | ||
| 577 | const int index = i * 3 + j; | ||
| 578 | |||
| 579 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index], | ||
| 580 | prev_frame_probs.fr[index]); | ||
| 581 | } | ||
| 582 | } | ||
| 583 | |||
| 584 | if (current_frame_info.allow_high_precision_mv) { | ||
| 585 | for (s32 index = 0; index < 2; index++) { | ||
| 586 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index], | ||
| 587 | prev_frame_probs.class_0_hp[index]); | ||
| 588 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index], | ||
| 589 | prev_frame_probs.high_precision[index]); | ||
| 590 | } | ||
| 591 | } | ||
| 592 | |||
| 593 | // save previous probs | ||
| 594 | if (current_frame_info.show_frame && !current_frame_info.is_key_frame) { | ||
| 595 | prev_frame_probs.sign = current_frame_info.entropy.sign; | ||
| 596 | prev_frame_probs.classes = current_frame_info.entropy.classes; | ||
| 597 | prev_frame_probs.class_0 = current_frame_info.entropy.class_0; | ||
| 598 | prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits; | ||
| 599 | prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr; | ||
| 600 | prev_frame_probs.fr = current_frame_info.entropy.fr; | ||
| 601 | prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp; | ||
| 602 | prev_frame_probs.high_precision = current_frame_info.entropy.high_precision; | ||
| 603 | } | ||
| 604 | } | ||
| 605 | |||
| 606 | writer.End(); | ||
| 607 | return writer.GetBuffer(); | ||
| 608 | |||
| 609 | const auto writer_bytearray = writer.GetBuffer(); | ||
| 610 | |||
| 611 | std::vector<u8> compressed_header(writer_bytearray.size()); | ||
| 612 | std::memcpy(compressed_header.data(), writer_bytearray.data(), writer_bytearray.size()); | ||
| 613 | return compressed_header; | ||
| 614 | } | ||
| 615 | |||
| 616 | VpxBitStreamWriter VP9::ComposeUncompressedHeader() { | ||
| 617 | VpxBitStreamWriter uncomp_writer{}; | ||
| 618 | |||
| 619 | uncomp_writer.WriteU(2, 2); // Frame marker. | ||
| 620 | uncomp_writer.WriteU(0, 2); // Profile. | ||
| 621 | uncomp_writer.WriteBit(false); // Show existing frame. | ||
| 622 | uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame? | ||
| 623 | uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame? | ||
| 624 | uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience | ||
| 625 | |||
| 626 | if (current_frame_info.is_key_frame) { | ||
| 627 | uncomp_writer.WriteU(frame_sync_code, 24); | ||
| 628 | uncomp_writer.WriteU(0, 3); // Color space. | ||
| 629 | uncomp_writer.WriteU(0, 1); // Color range. | ||
| 630 | uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16); | ||
| 631 | uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16); | ||
| 632 | uncomp_writer.WriteBit(false); // Render and frame size different. | ||
| 633 | |||
| 634 | // Reset context | ||
| 635 | prev_frame_probs = default_probs; | ||
| 636 | swap_next_golden = false; | ||
| 637 | loop_filter_ref_deltas.fill(0); | ||
| 638 | loop_filter_mode_deltas.fill(0); | ||
| 639 | |||
| 640 | // allow frames offsets to stabilize before checking for golden frames | ||
| 641 | grace_period = 4; | ||
| 642 | |||
| 643 | // On key frames, all frame slots are set to the current frame, | ||
| 644 | // so the value of the selected slot doesn't really matter. | ||
| 645 | frame_ctxs.fill({current_frame_number, false, default_probs}); | ||
| 646 | |||
| 647 | // intra only, meaning the frame can be recreated with no other references | ||
| 648 | current_frame_info.intra_only = true; | ||
| 649 | |||
| 650 | } else { | ||
| 651 | std::array<s32, 3> ref_frame_index; | ||
| 652 | |||
| 653 | if (!current_frame_info.show_frame) { | ||
| 654 | uncomp_writer.WriteBit(current_frame_info.intra_only); | ||
| 655 | if (!current_frame_info.last_frame_was_key) { | ||
| 656 | swap_next_golden = !swap_next_golden; | ||
| 657 | } | ||
| 658 | } else { | ||
| 659 | current_frame_info.intra_only = false; | ||
| 660 | } | ||
| 661 | if (!current_frame_info.error_resilient_mode) { | ||
| 662 | uncomp_writer.WriteU(0, 2); // Reset frame context. | ||
| 663 | } | ||
| 664 | |||
| 665 | // Last, Golden, Altref frames | ||
| 666 | ref_frame_index = std::array<s32, 3>{0, 1, 2}; | ||
| 667 | |||
| 668 | // set when next frame is hidden | ||
| 669 | // altref and golden references are swapped | ||
| 670 | if (swap_next_golden) { | ||
| 671 | ref_frame_index = std::array<s32, 3>{0, 2, 1}; | ||
| 672 | } | ||
| 673 | |||
| 674 | // update Last Frame | ||
| 675 | u64 refresh_frame_flags = 1; | ||
| 676 | |||
| 677 | // golden frame may refresh, determined if the next golden frame offset is changed | ||
| 678 | bool golden_refresh = false; | ||
| 679 | if (grace_period <= 0) { | ||
| 680 | for (s32 index = 1; index < 3; ++index) { | ||
| 681 | if (current_frame_info.frame_offsets[index] != | ||
| 682 | next_frame.info.frame_offsets[index]) { | ||
| 683 | current_frame_info.refresh_frame[index] = true; | ||
| 684 | golden_refresh = true; | ||
| 685 | grace_period = 3; | ||
| 686 | } | ||
| 687 | } | ||
| 688 | } | ||
| 689 | |||
| 690 | if (current_frame_info.show_frame && | ||
| 691 | (!next_frame.info.show_frame || next_frame.info.is_key_frame)) { | ||
| 692 | // Update golden frame | ||
| 693 | refresh_frame_flags = swap_next_golden ? 2 : 4; | ||
| 694 | } | ||
| 695 | |||
| 696 | if (!current_frame_info.show_frame) { | ||
| 697 | // Update altref | ||
| 698 | refresh_frame_flags = swap_next_golden ? 2 : 4; | ||
| 699 | } else if (golden_refresh) { | ||
| 700 | refresh_frame_flags = 3; | ||
| 701 | } | ||
| 702 | |||
| 703 | if (current_frame_info.intra_only) { | ||
| 704 | uncomp_writer.WriteU(frame_sync_code, 24); | ||
| 705 | uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8); | ||
| 706 | uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16); | ||
| 707 | uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16); | ||
| 708 | uncomp_writer.WriteBit(false); // Render and frame size different. | ||
| 709 | } else { | ||
| 710 | uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8); | ||
| 711 | |||
| 712 | for (s32 index = 1; index < 4; index++) { | ||
| 713 | uncomp_writer.WriteU(ref_frame_index[index - 1], 3); | ||
| 714 | uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1); | ||
| 715 | } | ||
| 716 | |||
| 717 | uncomp_writer.WriteBit(true); // Frame size with refs. | ||
| 718 | uncomp_writer.WriteBit(false); // Render and frame size different. | ||
| 719 | uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv); | ||
| 720 | uncomp_writer.WriteBit(current_frame_info.interp_filter == 4); | ||
| 721 | |||
| 722 | if (current_frame_info.interp_filter != 4) { | ||
| 723 | uncomp_writer.WriteU(current_frame_info.interp_filter, 2); | ||
| 724 | } | ||
| 725 | } | ||
| 726 | } | ||
| 727 | |||
| 728 | if (!current_frame_info.error_resilient_mode) { | ||
| 729 | uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from? | ||
| 730 | uncomp_writer.WriteBit(true); // Frame parallel decoding mode. | ||
| 731 | } | ||
| 732 | |||
| 733 | int frame_ctx_idx = 0; | ||
| 734 | if (!current_frame_info.show_frame) { | ||
| 735 | frame_ctx_idx = 1; | ||
| 736 | } | ||
| 737 | |||
| 738 | uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index. | ||
| 739 | prev_frame_probs = | ||
| 740 | frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header | ||
| 741 | frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy}; | ||
| 742 | |||
| 743 | uncomp_writer.WriteU(current_frame_info.first_level, 6); | ||
| 744 | uncomp_writer.WriteU(current_frame_info.sharpness_level, 3); | ||
| 745 | uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled); | ||
| 746 | |||
| 747 | if (current_frame_info.mode_ref_delta_enabled) { | ||
| 748 | // check if ref deltas are different, update accordingly | ||
| 749 | std::array<bool, 4> update_loop_filter_ref_deltas; | ||
| 750 | std::array<bool, 2> update_loop_filter_mode_deltas; | ||
| 751 | |||
| 752 | bool loop_filter_delta_update = false; | ||
| 753 | |||
| 754 | for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) { | ||
| 755 | const s8 old_deltas = loop_filter_ref_deltas[index]; | ||
| 756 | const s8 new_deltas = current_frame_info.ref_deltas[index]; | ||
| 757 | |||
| 758 | loop_filter_delta_update |= | ||
| 759 | (update_loop_filter_ref_deltas[index] = old_deltas != new_deltas); | ||
| 760 | } | ||
| 761 | |||
| 762 | for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) { | ||
| 763 | const s8 old_deltas = loop_filter_mode_deltas[index]; | ||
| 764 | const s8 new_deltas = current_frame_info.mode_deltas[index]; | ||
| 765 | |||
| 766 | loop_filter_delta_update |= | ||
| 767 | (update_loop_filter_mode_deltas[index] = old_deltas != new_deltas); | ||
| 768 | } | ||
| 769 | |||
| 770 | uncomp_writer.WriteBit(loop_filter_delta_update); | ||
| 771 | |||
| 772 | if (loop_filter_delta_update) { | ||
| 773 | for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) { | ||
| 774 | uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]); | ||
| 775 | |||
| 776 | if (update_loop_filter_ref_deltas[index]) { | ||
| 777 | uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6); | ||
| 778 | } | ||
| 779 | } | ||
| 780 | |||
| 781 | for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) { | ||
| 782 | uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]); | ||
| 783 | |||
| 784 | if (update_loop_filter_mode_deltas[index]) { | ||
| 785 | uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6); | ||
| 786 | } | ||
| 787 | } | ||
| 788 | // save new deltas | ||
| 789 | loop_filter_ref_deltas = current_frame_info.ref_deltas; | ||
| 790 | loop_filter_mode_deltas = current_frame_info.mode_deltas; | ||
| 791 | } | ||
| 792 | } | ||
| 793 | |||
| 794 | uncomp_writer.WriteU(current_frame_info.base_q_index, 8); | ||
| 795 | |||
| 796 | uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q); | ||
| 797 | uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q); | ||
| 798 | uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q); | ||
| 799 | |||
| 800 | uncomp_writer.WriteBit(false); // Segmentation enabled (TODO). | ||
| 801 | |||
| 802 | const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width); | ||
| 803 | const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width); | ||
| 804 | |||
| 805 | const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2; | ||
| 806 | const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1; | ||
| 807 | |||
| 808 | // If it's less than the maximum, we need to add an extra 0 on the bitstream | ||
| 809 | // to indicate that it should stop reading. | ||
| 810 | if (current_frame_info.log2_tile_cols < max_tile_cols_log2) { | ||
| 811 | uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1); | ||
| 812 | } else { | ||
| 813 | uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff); | ||
| 814 | } | ||
| 815 | |||
| 816 | const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0; | ||
| 817 | |||
| 818 | uncomp_writer.WriteBit(tile_rows_log2_is_nonzero); | ||
| 819 | |||
| 820 | if (tile_rows_log2_is_nonzero) { | ||
| 821 | uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1); | ||
| 822 | } | ||
| 823 | |||
| 824 | return uncomp_writer; | ||
| 825 | } | ||
| 826 | |||
| 827 | std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { | ||
| 828 | std::vector<u8> bitstream; | ||
| 829 | { | ||
| 830 | Vp9FrameContainer curr_frame = GetCurrentFrame(state); | ||
| 831 | current_frame_info = curr_frame.info; | ||
| 832 | bitstream = curr_frame.bit_stream; | ||
| 833 | } | ||
| 834 | |||
| 835 | // The uncompressed header routine sets PrevProb parameters needed for the compressed header | ||
| 836 | auto uncomp_writer = ComposeUncompressedHeader(); | ||
| 837 | std::vector<u8> compressed_header = ComposeCompressedHeader(); | ||
| 838 | |||
| 839 | uncomp_writer.WriteU(static_cast<s32>(compressed_header.size()), 16); | ||
| 840 | uncomp_writer.Flush(); | ||
| 841 | std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray(); | ||
| 842 | |||
| 843 | // Write headers and frame to buffer | ||
| 844 | frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size()); | ||
| 845 | std::memcpy(frame.data(), uncompressed_header.data(), uncompressed_header.size()); | ||
| 846 | std::memcpy(frame.data() + uncompressed_header.size(), compressed_header.data(), | ||
| 847 | compressed_header.size()); | ||
| 848 | std::memcpy(frame.data() + uncompressed_header.size() + compressed_header.size(), | ||
| 849 | bitstream.data(), bitstream.size()); | ||
| 850 | |||
| 851 | // keep track of frame number | ||
| 852 | current_frame_number++; | ||
| 853 | grace_period--; | ||
| 854 | |||
| 855 | // don't display hidden frames | ||
| 856 | hidden = !current_frame_info.show_frame; | ||
| 857 | return frame; | ||
| 858 | } | ||
| 859 | |||
| 860 | VpxRangeEncoder::VpxRangeEncoder() { | ||
| 861 | Write(false); | ||
| 862 | } | ||
| 863 | |||
| 864 | VpxRangeEncoder::~VpxRangeEncoder() = default; | ||
| 865 | |||
| 866 | void VpxRangeEncoder::Write(s32 value, s32 value_size) { | ||
| 867 | for (s32 bit = value_size - 1; bit >= 0; bit--) { | ||
| 868 | Write(((value >> bit) & 1) != 0); | ||
| 869 | } | ||
| 870 | } | ||
| 871 | |||
| 872 | void VpxRangeEncoder::Write(bool bit) { | ||
| 873 | Write(bit, half_probability); | ||
| 874 | } | ||
| 875 | |||
| 876 | void VpxRangeEncoder::Write(bool bit, s32 probability) { | ||
| 877 | u32 local_range = range; | ||
| 878 | const u32 split = 1 + (((local_range - 1) * static_cast<u32>(probability)) >> 8); | ||
| 879 | local_range = split; | ||
| 880 | |||
| 881 | if (bit) { | ||
| 882 | low_value += split; | ||
| 883 | local_range = range - split; | ||
| 884 | } | ||
| 885 | |||
| 886 | s32 shift = norm_lut[local_range]; | ||
| 887 | local_range <<= shift; | ||
| 888 | count += shift; | ||
| 889 | |||
| 890 | if (count >= 0) { | ||
| 891 | const s32 offset = shift - count; | ||
| 892 | |||
| 893 | if (((low_value << (offset - 1)) >> 31) != 0) { | ||
| 894 | const s32 current_pos = static_cast<s32>(base_stream.GetPosition()); | ||
| 895 | base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos); | ||
| 896 | while (base_stream.GetPosition() >= 0 && PeekByte() == 0xff) { | ||
| 897 | base_stream.WriteByte(0); | ||
| 898 | |||
| 899 | base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos); | ||
| 900 | } | ||
| 901 | base_stream.WriteByte(static_cast<u8>((PeekByte() + 1))); | ||
| 902 | base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin); | ||
| 903 | } | ||
| 904 | base_stream.WriteByte(static_cast<u8>((low_value >> (24 - offset)))); | ||
| 905 | |||
| 906 | low_value <<= offset; | ||
| 907 | shift = count; | ||
| 908 | low_value &= 0xffffff; | ||
| 909 | count -= 8; | ||
| 910 | } | ||
| 911 | |||
| 912 | low_value <<= shift; | ||
| 913 | range = local_range; | ||
| 914 | } | ||
| 915 | |||
| 916 | void VpxRangeEncoder::End() { | ||
| 917 | for (std::size_t index = 0; index < 32; ++index) { | ||
| 918 | Write(false); | ||
| 919 | } | ||
| 920 | } | ||
| 921 | |||
| 922 | u8 VpxRangeEncoder::PeekByte() { | ||
| 923 | const u8 value = base_stream.ReadByte(); | ||
| 924 | base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos); | ||
| 925 | |||
| 926 | return value; | ||
| 927 | } | ||
| 928 | |||
| 929 | VpxBitStreamWriter::VpxBitStreamWriter() = default; | ||
| 930 | |||
| 931 | VpxBitStreamWriter::~VpxBitStreamWriter() = default; | ||
| 932 | |||
| 933 | void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) { | ||
| 934 | WriteBits(value, value_size); | ||
| 935 | } | ||
| 936 | |||
| 937 | void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) { | ||
| 938 | const bool sign = value < 0; | ||
| 939 | if (sign) { | ||
| 940 | value = -value; | ||
| 941 | } | ||
| 942 | |||
| 943 | WriteBits(static_cast<u32>(value << 1) | (sign ? 1 : 0), value_size + 1); | ||
| 944 | } | ||
| 945 | |||
| 946 | void VpxBitStreamWriter::WriteDeltaQ(u32 value) { | ||
| 947 | const bool delta_coded = value != 0; | ||
| 948 | WriteBit(delta_coded); | ||
| 949 | |||
| 950 | if (delta_coded) { | ||
| 951 | WriteBits(value, 4); | ||
| 952 | } | ||
| 953 | } | ||
| 954 | |||
| 955 | void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) { | ||
| 956 | s32 value_pos = 0; | ||
| 957 | s32 remaining = bit_count; | ||
| 958 | |||
| 959 | while (remaining > 0) { | ||
| 960 | s32 copy_size = remaining; | ||
| 961 | |||
| 962 | const s32 free = GetFreeBufferBits(); | ||
| 963 | |||
| 964 | if (copy_size > free) { | ||
| 965 | copy_size = free; | ||
| 966 | } | ||
| 967 | |||
| 968 | const s32 mask = (1 << copy_size) - 1; | ||
| 969 | |||
| 970 | const s32 src_shift = (bit_count - value_pos) - copy_size; | ||
| 971 | const s32 dst_shift = (buffer_size - buffer_pos) - copy_size; | ||
| 972 | |||
| 973 | buffer |= ((value >> src_shift) & mask) << dst_shift; | ||
| 974 | |||
| 975 | value_pos += copy_size; | ||
| 976 | buffer_pos += copy_size; | ||
| 977 | remaining -= copy_size; | ||
| 978 | } | ||
| 979 | } | ||
| 980 | |||
| 981 | void VpxBitStreamWriter::WriteBit(bool state) { | ||
| 982 | WriteBits(state ? 1 : 0, 1); | ||
| 983 | } | ||
| 984 | |||
| 985 | s32 VpxBitStreamWriter::GetFreeBufferBits() { | ||
| 986 | if (buffer_pos == buffer_size) { | ||
| 987 | Flush(); | ||
| 988 | } | ||
| 989 | |||
| 990 | return buffer_size - buffer_pos; | ||
| 991 | } | ||
| 992 | |||
| 993 | void VpxBitStreamWriter::Flush() { | ||
| 994 | if (buffer_pos == 0) { | ||
| 995 | return; | ||
| 996 | } | ||
| 997 | byte_array.push_back(static_cast<u8>(buffer)); | ||
| 998 | buffer = 0; | ||
| 999 | buffer_pos = 0; | ||
| 1000 | } | ||
| 1001 | |||
| 1002 | std::vector<u8>& VpxBitStreamWriter::GetByteArray() { | ||
| 1003 | return byte_array; | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const { | ||
| 1007 | return byte_array; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | } // namespace Tegra::Decoder | ||
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h new file mode 100644 index 000000000..748e11bae --- /dev/null +++ b/src/video_core/command_classes/codecs/vp9.h | |||
| @@ -0,0 +1,216 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | #include <vector> | ||
| 9 | #include "common/common_funcs.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/stream.h" | ||
| 12 | #include "video_core/command_classes/codecs/vp9_types.h" | ||
| 13 | #include "video_core/command_classes/nvdec_common.h" | ||
| 14 | |||
| 15 | namespace Tegra { | ||
| 16 | class GPU; | ||
| 17 | enum class FrameType { KeyFrame = 0, InterFrame = 1 }; | ||
| 18 | namespace Decoder { | ||
| 19 | |||
| 20 | /// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the | ||
| 21 | /// VP9 header bitstreams. | ||
| 22 | |||
| 23 | class VpxRangeEncoder { | ||
| 24 | public: | ||
| 25 | VpxRangeEncoder(); | ||
| 26 | ~VpxRangeEncoder(); | ||
| 27 | |||
| 28 | /// Writes the rightmost value_size bits from value into the stream | ||
| 29 | void Write(s32 value, s32 value_size); | ||
| 30 | |||
| 31 | /// Writes a single bit with half probability | ||
| 32 | void Write(bool bit); | ||
| 33 | |||
| 34 | /// Writes a bit to the base_stream encoded with probability | ||
| 35 | void Write(bool bit, s32 probability); | ||
| 36 | |||
| 37 | /// Signal the end of the bitstream | ||
| 38 | void End(); | ||
| 39 | |||
| 40 | std::vector<u8>& GetBuffer() { | ||
| 41 | return base_stream.GetBuffer(); | ||
| 42 | } | ||
| 43 | |||
| 44 | const std::vector<u8>& GetBuffer() const { | ||
| 45 | return base_stream.GetBuffer(); | ||
| 46 | } | ||
| 47 | |||
| 48 | private: | ||
| 49 | u8 PeekByte(); | ||
| 50 | Common::Stream base_stream{}; | ||
| 51 | u32 low_value{}; | ||
| 52 | u32 range{0xff}; | ||
| 53 | s32 count{-24}; | ||
| 54 | s32 half_probability{128}; | ||
| 55 | static constexpr std::array<s32, 256> norm_lut{ | ||
| 56 | 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 57 | 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
| 58 | 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
| 59 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
| 60 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 61 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 62 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 63 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 64 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 65 | }; | ||
| 66 | }; | ||
| 67 | |||
| 68 | class VpxBitStreamWriter { | ||
| 69 | public: | ||
| 70 | VpxBitStreamWriter(); | ||
| 71 | ~VpxBitStreamWriter(); | ||
| 72 | |||
| 73 | /// Write an unsigned integer value | ||
| 74 | void WriteU(u32 value, u32 value_size); | ||
| 75 | |||
| 76 | /// Write a signed integer value | ||
| 77 | void WriteS(s32 value, u32 value_size); | ||
| 78 | |||
| 79 | /// Based on 6.2.10 of VP9 Spec, writes a delta coded value | ||
| 80 | void WriteDeltaQ(u32 value); | ||
| 81 | |||
| 82 | /// Write a single bit. | ||
| 83 | void WriteBit(bool state); | ||
| 84 | |||
| 85 | /// Pushes current buffer into buffer_array, resets buffer | ||
| 86 | void Flush(); | ||
| 87 | |||
| 88 | /// Returns byte_array | ||
| 89 | std::vector<u8>& GetByteArray(); | ||
| 90 | |||
| 91 | /// Returns const byte_array | ||
| 92 | const std::vector<u8>& GetByteArray() const; | ||
| 93 | |||
| 94 | private: | ||
| 95 | /// Write bit_count bits from value into buffer | ||
| 96 | void WriteBits(u32 value, u32 bit_count); | ||
| 97 | |||
| 98 | /// Gets next available position in buffer, invokes Flush() if buffer is full | ||
| 99 | s32 GetFreeBufferBits(); | ||
| 100 | |||
| 101 | s32 buffer_size{8}; | ||
| 102 | |||
| 103 | s32 buffer{}; | ||
| 104 | s32 buffer_pos{}; | ||
| 105 | std::vector<u8> byte_array; | ||
| 106 | }; | ||
| 107 | |||
| 108 | class VP9 { | ||
| 109 | public: | ||
| 110 | explicit VP9(GPU& gpu); | ||
| 111 | ~VP9(); | ||
| 112 | |||
| 113 | /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec | ||
| 114 | /// documentation | ||
| 115 | std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); | ||
| 116 | |||
| 117 | /// Returns true if the most recent frame was a hidden frame. | ||
| 118 | bool WasFrameHidden() const { | ||
| 119 | return hidden; | ||
| 120 | } | ||
| 121 | |||
| 122 | private: | ||
| 123 | /// Generates compressed header probability updates in the bitstream writer | ||
| 124 | template <typename T, std::size_t N> | ||
| 125 | void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, | ||
| 126 | const std::array<T, N>& old_prob); | ||
| 127 | |||
| 128 | /// Generates compressed header probability updates in the bitstream writer | ||
| 129 | /// If probs are not equal, WriteProbabilityDelta is invoked | ||
| 130 | void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); | ||
| 131 | |||
| 132 | /// Generates compressed header probability deltas in the bitstream writer | ||
| 133 | void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); | ||
| 134 | |||
| 135 | /// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification | ||
| 136 | s32 RemapProbability(s32 new_prob, s32 old_prob); | ||
| 137 | |||
| 138 | /// Recenters probability. Based on section 6.3.6 of VP9 Specification | ||
| 139 | s32 RecenterNonNeg(s32 new_prob, s32 old_prob); | ||
| 140 | |||
| 141 | /// Inverse of 6.3.4 Decode term subexp | ||
| 142 | void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value); | ||
| 143 | |||
| 144 | /// Writes if the value is less than the test value | ||
| 145 | bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test); | ||
| 146 | |||
| 147 | /// Writes probability updates for the Coef probabilities | ||
| 148 | void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, | ||
| 149 | const std::array<u8, 2304>& new_prob, | ||
| 150 | const std::array<u8, 2304>& old_prob); | ||
| 151 | |||
| 152 | /// Write probabilities for 4-byte aligned structures | ||
| 153 | template <typename T, std::size_t N> | ||
| 154 | void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, | ||
| 155 | const std::array<T, N>& old_prob); | ||
| 156 | |||
| 157 | /// Write motion vector probability updates. 6.3.17 in the spec | ||
| 158 | void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); | ||
| 159 | |||
| 160 | /// 6.2.14 Tile size calculation | ||
| 161 | s32 CalcMinLog2TileCols(s32 frame_width); | ||
| 162 | s32 CalcMaxLog2TileCols(s32 frame_width); | ||
| 163 | |||
| 164 | /// Returns VP9 information from NVDEC provided offset and size | ||
| 165 | Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); | ||
| 166 | |||
| 167 | /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct | ||
| 168 | void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); | ||
| 169 | |||
| 170 | /// Returns frame to be decoded after buffering | ||
| 171 | Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state); | ||
| 172 | |||
| 173 | /// Use NVDEC providied information to compose the headers for the current frame | ||
| 174 | std::vector<u8> ComposeCompressedHeader(); | ||
| 175 | VpxBitStreamWriter ComposeUncompressedHeader(); | ||
| 176 | |||
| 177 | GPU& gpu; | ||
| 178 | std::vector<u8> frame; | ||
| 179 | |||
| 180 | std::array<s8, 4> loop_filter_ref_deltas{}; | ||
| 181 | std::array<s8, 2> loop_filter_mode_deltas{}; | ||
| 182 | |||
| 183 | bool hidden; | ||
| 184 | s64 current_frame_number = -2; // since we buffer 2 frames | ||
| 185 | s32 grace_period = 6; // frame offsets need to stabilize | ||
| 186 | std::array<FrameContexts, 4> frame_ctxs{}; | ||
| 187 | Vp9FrameContainer next_frame{}; | ||
| 188 | Vp9FrameContainer next_next_frame{}; | ||
| 189 | bool swap_next_golden{}; | ||
| 190 | |||
| 191 | Vp9PictureInfo current_frame_info{}; | ||
| 192 | Vp9EntropyProbs prev_frame_probs{}; | ||
| 193 | |||
| 194 | s32 diff_update_probability = 252; | ||
| 195 | s32 frame_sync_code = 0x498342; | ||
| 196 | static constexpr std::array<s32, 254> map_lut = { | ||
| 197 | 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, | ||
| 198 | 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, | ||
| 199 | 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, | ||
| 200 | 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, | ||
| 201 | 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 6, | ||
| 202 | 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113, | ||
| 203 | 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129, | ||
| 204 | 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, | ||
| 205 | 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, 160, | ||
| 206 | 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, | ||
| 207 | 177, 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, | ||
| 208 | 193, 14, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, | ||
| 209 | 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, | ||
| 210 | 224, 225, 226, 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, | ||
| 211 | 240, 241, 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19, | ||
| 212 | }; | ||
| 213 | }; | ||
| 214 | |||
| 215 | } // namespace Decoder | ||
| 216 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h new file mode 100644 index 000000000..8688fdac0 --- /dev/null +++ b/src/video_core/command_classes/codecs/vp9_types.h | |||
| @@ -0,0 +1,369 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <list> | ||
| 9 | #include <vector> | ||
| 10 | #include "common/cityhash.h" | ||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/command_classes/nvdec_common.h" | ||
| 14 | |||
| 15 | namespace Tegra { | ||
| 16 | class GPU; | ||
| 17 | |||
| 18 | namespace Decoder { | ||
| 19 | struct Vp9FrameDimensions { | ||
| 20 | s16 width{}; | ||
| 21 | s16 height{}; | ||
| 22 | s16 luma_pitch{}; | ||
| 23 | s16 chroma_pitch{}; | ||
| 24 | }; | ||
| 25 | static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); | ||
| 26 | |||
| 27 | enum FrameFlags : u32 { | ||
| 28 | IsKeyFrame = 1 << 0, | ||
| 29 | LastFrameIsKeyFrame = 1 << 1, | ||
| 30 | FrameSizeChanged = 1 << 2, | ||
| 31 | ErrorResilientMode = 1 << 3, | ||
| 32 | LastShowFrame = 1 << 4, | ||
| 33 | IntraOnly = 1 << 5, | ||
| 34 | }; | ||
| 35 | |||
| 36 | enum class MvJointType { | ||
| 37 | MvJointZero = 0, /* Zero vector */ | ||
| 38 | MvJointHnzvz = 1, /* Vert zero, hor nonzero */ | ||
| 39 | MvJointHzvnz = 2, /* Hor zero, vert nonzero */ | ||
| 40 | MvJointHnzvnz = 3, /* Both components nonzero */ | ||
| 41 | }; | ||
| 42 | enum class MvClassType { | ||
| 43 | MvClass0 = 0, /* (0, 2] integer pel */ | ||
| 44 | MvClass1 = 1, /* (2, 4] integer pel */ | ||
| 45 | MvClass2 = 2, /* (4, 8] integer pel */ | ||
| 46 | MvClass3 = 3, /* (8, 16] integer pel */ | ||
| 47 | MvClass4 = 4, /* (16, 32] integer pel */ | ||
| 48 | MvClass5 = 5, /* (32, 64] integer pel */ | ||
| 49 | MvClass6 = 6, /* (64, 128] integer pel */ | ||
| 50 | MvClass7 = 7, /* (128, 256] integer pel */ | ||
| 51 | MvClass8 = 8, /* (256, 512] integer pel */ | ||
| 52 | MvClass9 = 9, /* (512, 1024] integer pel */ | ||
| 53 | MvClass10 = 10, /* (1024,2048] integer pel */ | ||
| 54 | }; | ||
| 55 | |||
| 56 | enum class BlockSize { | ||
| 57 | Block4x4 = 0, | ||
| 58 | Block4x8 = 1, | ||
| 59 | Block8x4 = 2, | ||
| 60 | Block8x8 = 3, | ||
| 61 | Block8x16 = 4, | ||
| 62 | Block16x8 = 5, | ||
| 63 | Block16x16 = 6, | ||
| 64 | Block16x32 = 7, | ||
| 65 | Block32x16 = 8, | ||
| 66 | Block32x32 = 9, | ||
| 67 | Block32x64 = 10, | ||
| 68 | Block64x32 = 11, | ||
| 69 | Block64x64 = 12, | ||
| 70 | BlockSizes = 13, | ||
| 71 | BlockInvalid = BlockSizes | ||
| 72 | }; | ||
| 73 | |||
| 74 | enum class PredictionMode { | ||
| 75 | DcPred = 0, // Average of above and left pixels | ||
| 76 | VPred = 1, // Vertical | ||
| 77 | HPred = 2, // Horizontal | ||
| 78 | D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi) | ||
| 79 | D135Pred = 4, // Directional 135 deg = 180 - 45 | ||
| 80 | D117Pred = 5, // Directional 117 deg = 180 - 63 | ||
| 81 | D153Pred = 6, // Directional 153 deg = 180 - 27 | ||
| 82 | D207Pred = 7, // Directional 207 deg = 180 + 27 | ||
| 83 | D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi) | ||
| 84 | TmPred = 9, // True-motion | ||
| 85 | NearestMv = 10, | ||
| 86 | NearMv = 11, | ||
| 87 | ZeroMv = 12, | ||
| 88 | NewMv = 13, | ||
| 89 | MbModeCount = 14 | ||
| 90 | }; | ||
| 91 | |||
| 92 | enum class TxSize { | ||
| 93 | Tx4x4 = 0, // 4x4 transform | ||
| 94 | Tx8x8 = 1, // 8x8 transform | ||
| 95 | Tx16x16 = 2, // 16x16 transform | ||
| 96 | Tx32x32 = 3, // 32x32 transform | ||
| 97 | TxSizes = 4 | ||
| 98 | }; | ||
| 99 | |||
| 100 | enum class TxMode { | ||
| 101 | Only4X4 = 0, // Only 4x4 transform used | ||
| 102 | Allow8X8 = 1, // Allow block transform size up to 8x8 | ||
| 103 | Allow16X16 = 2, // Allow block transform size up to 16x16 | ||
| 104 | Allow32X32 = 3, // Allow block transform size up to 32x32 | ||
| 105 | TxModeSelect = 4, // Transform specified for each block | ||
| 106 | TxModes = 5 | ||
| 107 | }; | ||
| 108 | |||
| 109 | enum class reference_mode { | ||
| 110 | SingleReference = 0, | ||
| 111 | CompoundReference = 1, | ||
| 112 | ReferenceModeSelect = 2, | ||
| 113 | ReferenceModes = 3 | ||
| 114 | }; | ||
| 115 | |||
| 116 | struct Segmentation { | ||
| 117 | u8 enabled{}; | ||
| 118 | u8 update_map{}; | ||
| 119 | u8 temporal_update{}; | ||
| 120 | u8 abs_delta{}; | ||
| 121 | std::array<u32, 8> feature_mask{}; | ||
| 122 | std::array<std::array<s16, 4>, 8> feature_data{}; | ||
| 123 | }; | ||
| 124 | static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); | ||
| 125 | |||
| 126 | struct LoopFilter { | ||
| 127 | u8 mode_ref_delta_enabled{}; | ||
| 128 | std::array<s8, 4> ref_deltas{}; | ||
| 129 | std::array<s8, 2> mode_deltas{}; | ||
| 130 | }; | ||
| 131 | static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); | ||
| 132 | |||
| 133 | struct Vp9EntropyProbs { | ||
| 134 | std::array<u8, 36> y_mode_prob{}; | ||
| 135 | std::array<u8, 64> partition_prob{}; | ||
| 136 | std::array<u8, 2304> coef_probs{}; | ||
| 137 | std::array<u8, 8> switchable_interp_prob{}; | ||
| 138 | std::array<u8, 28> inter_mode_prob{}; | ||
| 139 | std::array<u8, 4> intra_inter_prob{}; | ||
| 140 | std::array<u8, 5> comp_inter_prob{}; | ||
| 141 | std::array<u8, 10> single_ref_prob{}; | ||
| 142 | std::array<u8, 5> comp_ref_prob{}; | ||
| 143 | std::array<u8, 6> tx_32x32_prob{}; | ||
| 144 | std::array<u8, 4> tx_16x16_prob{}; | ||
| 145 | std::array<u8, 2> tx_8x8_prob{}; | ||
| 146 | std::array<u8, 3> skip_probs{}; | ||
| 147 | std::array<u8, 3> joints{}; | ||
| 148 | std::array<u8, 2> sign{}; | ||
| 149 | std::array<u8, 20> classes{}; | ||
| 150 | std::array<u8, 2> class_0{}; | ||
| 151 | std::array<u8, 20> prob_bits{}; | ||
| 152 | std::array<u8, 12> class_0_fr{}; | ||
| 153 | std::array<u8, 6> fr{}; | ||
| 154 | std::array<u8, 2> class_0_hp{}; | ||
| 155 | std::array<u8, 2> high_precision{}; | ||
| 156 | }; | ||
| 157 | static_assert(sizeof(Vp9EntropyProbs) == 0x9F4, "Vp9EntropyProbs is an invalid size"); | ||
| 158 | |||
| 159 | struct Vp9PictureInfo { | ||
| 160 | bool is_key_frame{}; | ||
| 161 | bool intra_only{}; | ||
| 162 | bool last_frame_was_key{}; | ||
| 163 | bool frame_size_changed{}; | ||
| 164 | bool error_resilient_mode{}; | ||
| 165 | bool last_frame_shown{}; | ||
| 166 | bool show_frame{}; | ||
| 167 | std::array<s8, 4> ref_frame_sign_bias{}; | ||
| 168 | s32 base_q_index{}; | ||
| 169 | s32 y_dc_delta_q{}; | ||
| 170 | s32 uv_dc_delta_q{}; | ||
| 171 | s32 uv_ac_delta_q{}; | ||
| 172 | bool lossless{}; | ||
| 173 | s32 transform_mode{}; | ||
| 174 | bool allow_high_precision_mv{}; | ||
| 175 | s32 interp_filter{}; | ||
| 176 | s32 reference_mode{}; | ||
| 177 | s8 comp_fixed_ref{}; | ||
| 178 | std::array<s8, 2> comp_var_ref{}; | ||
| 179 | s32 log2_tile_cols{}; | ||
| 180 | s32 log2_tile_rows{}; | ||
| 181 | bool segment_enabled{}; | ||
| 182 | bool segment_map_update{}; | ||
| 183 | bool segment_map_temporal_update{}; | ||
| 184 | s32 segment_abs_delta{}; | ||
| 185 | std::array<u32, 8> segment_feature_enable{}; | ||
| 186 | std::array<std::array<s16, 4>, 8> segment_feature_data{}; | ||
| 187 | bool mode_ref_delta_enabled{}; | ||
| 188 | bool use_prev_in_find_mv_refs{}; | ||
| 189 | std::array<s8, 4> ref_deltas{}; | ||
| 190 | std::array<s8, 2> mode_deltas{}; | ||
| 191 | Vp9EntropyProbs entropy{}; | ||
| 192 | Vp9FrameDimensions frame_size{}; | ||
| 193 | u8 first_level{}; | ||
| 194 | u8 sharpness_level{}; | ||
| 195 | u32 bitstream_size{}; | ||
| 196 | std::array<u64, 4> frame_offsets{}; | ||
| 197 | std::array<bool, 4> refresh_frame{}; | ||
| 198 | }; | ||
| 199 | |||
| 200 | struct Vp9FrameContainer { | ||
| 201 | Vp9PictureInfo info{}; | ||
| 202 | std::vector<u8> bit_stream; | ||
| 203 | }; | ||
| 204 | |||
| 205 | struct PictureInfo { | ||
| 206 | INSERT_PADDING_WORDS(12); | ||
| 207 | u32 bitstream_size{}; | ||
| 208 | INSERT_PADDING_WORDS(5); | ||
| 209 | Vp9FrameDimensions last_frame_size{}; | ||
| 210 | Vp9FrameDimensions golden_frame_size{}; | ||
| 211 | Vp9FrameDimensions alt_frame_size{}; | ||
| 212 | Vp9FrameDimensions current_frame_size{}; | ||
| 213 | u32 vp9_flags{}; | ||
| 214 | std::array<s8, 4> ref_frame_sign_bias{}; | ||
| 215 | u8 first_level{}; | ||
| 216 | u8 sharpness_level{}; | ||
| 217 | u8 base_q_index{}; | ||
| 218 | u8 y_dc_delta_q{}; | ||
| 219 | u8 uv_ac_delta_q{}; | ||
| 220 | u8 uv_dc_delta_q{}; | ||
| 221 | u8 lossless{}; | ||
| 222 | u8 tx_mode{}; | ||
| 223 | u8 allow_high_precision_mv{}; | ||
| 224 | u8 interp_filter{}; | ||
| 225 | u8 reference_mode{}; | ||
| 226 | s8 comp_fixed_ref{}; | ||
| 227 | std::array<s8, 2> comp_var_ref{}; | ||
| 228 | u8 log2_tile_cols{}; | ||
| 229 | u8 log2_tile_rows{}; | ||
| 230 | Segmentation segmentation{}; | ||
| 231 | LoopFilter loop_filter{}; | ||
| 232 | INSERT_PADDING_BYTES(5); | ||
| 233 | u32 surface_params{}; | ||
| 234 | INSERT_PADDING_WORDS(3); | ||
| 235 | |||
| 236 | Vp9PictureInfo Convert() const { | ||
| 237 | |||
| 238 | return Vp9PictureInfo{ | ||
| 239 | .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, | ||
| 240 | .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, | ||
| 241 | .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, | ||
| 242 | .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, | ||
| 243 | .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, | ||
| 244 | .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, | ||
| 245 | .ref_frame_sign_bias = ref_frame_sign_bias, | ||
| 246 | .base_q_index = base_q_index, | ||
| 247 | .y_dc_delta_q = y_dc_delta_q, | ||
| 248 | .uv_dc_delta_q = uv_dc_delta_q, | ||
| 249 | .uv_ac_delta_q = uv_ac_delta_q, | ||
| 250 | .lossless = lossless != 0, | ||
| 251 | .transform_mode = tx_mode, | ||
| 252 | .allow_high_precision_mv = allow_high_precision_mv != 0, | ||
| 253 | .interp_filter = interp_filter, | ||
| 254 | .reference_mode = reference_mode, | ||
| 255 | .comp_fixed_ref = comp_fixed_ref, | ||
| 256 | .comp_var_ref = comp_var_ref, | ||
| 257 | .log2_tile_cols = log2_tile_cols, | ||
| 258 | .log2_tile_rows = log2_tile_rows, | ||
| 259 | .segment_enabled = segmentation.enabled != 0, | ||
| 260 | .segment_map_update = segmentation.update_map != 0, | ||
| 261 | .segment_map_temporal_update = segmentation.temporal_update != 0, | ||
| 262 | .segment_abs_delta = segmentation.abs_delta, | ||
| 263 | .segment_feature_enable = segmentation.feature_mask, | ||
| 264 | .segment_feature_data = segmentation.feature_data, | ||
| 265 | .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, | ||
| 266 | .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) && | ||
| 267 | !(vp9_flags == (FrameFlags::FrameSizeChanged)) && | ||
| 268 | !(vp9_flags == (FrameFlags::IntraOnly)) && | ||
| 269 | (vp9_flags == (FrameFlags::LastShowFrame)) && | ||
| 270 | !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), | ||
| 271 | .ref_deltas = loop_filter.ref_deltas, | ||
| 272 | .mode_deltas = loop_filter.mode_deltas, | ||
| 273 | .frame_size = current_frame_size, | ||
| 274 | .first_level = first_level, | ||
| 275 | .sharpness_level = sharpness_level, | ||
| 276 | .bitstream_size = bitstream_size, | ||
| 277 | }; | ||
| 278 | } | ||
| 279 | }; | ||
| 280 | static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); | ||
| 281 | |||
| 282 | struct EntropyProbs { | ||
| 283 | INSERT_PADDING_BYTES(1024); | ||
| 284 | std::array<std::array<u8, 4>, 7> inter_mode_prob{}; | ||
| 285 | std::array<u8, 4> intra_inter_prob{}; | ||
| 286 | INSERT_PADDING_BYTES(80); | ||
| 287 | std::array<std::array<u8, 1>, 2> tx_8x8_prob{}; | ||
| 288 | std::array<std::array<u8, 2>, 2> tx_16x16_prob{}; | ||
| 289 | std::array<std::array<u8, 3>, 2> tx_32x32_prob{}; | ||
| 290 | std::array<u8, 4> y_mode_prob_e8{}; | ||
| 291 | std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; | ||
| 292 | INSERT_PADDING_BYTES(64); | ||
| 293 | std::array<std::array<u8, 4>, 16> partition_prob{}; | ||
| 294 | INSERT_PADDING_BYTES(10); | ||
| 295 | std::array<std::array<u8, 2>, 4> switchable_interp_prob{}; | ||
| 296 | std::array<u8, 5> comp_inter_prob{}; | ||
| 297 | std::array<u8, 4> skip_probs{}; | ||
| 298 | std::array<u8, 3> joints{}; | ||
| 299 | std::array<u8, 2> sign{}; | ||
| 300 | std::array<std::array<u8, 1>, 2> class_0{}; | ||
| 301 | std::array<std::array<u8, 3>, 2> fr{}; | ||
| 302 | std::array<u8, 2> class_0_hp{}; | ||
| 303 | std::array<u8, 2> high_precision{}; | ||
| 304 | std::array<std::array<u8, 10>, 2> classes{}; | ||
| 305 | std::array<std::array<std::array<u8, 3>, 2>, 2> class_0_fr{}; | ||
| 306 | std::array<std::array<u8, 10>, 2> pred_bits{}; | ||
| 307 | std::array<std::array<u8, 2>, 5> single_ref_prob{}; | ||
| 308 | std::array<u8, 5> comp_ref_prob{}; | ||
| 309 | INSERT_PADDING_BYTES(17); | ||
| 310 | std::array<std::array<std::array<std::array<std::array<std::array<u8, 4>, 6>, 6>, 2>, 2>, 4> | ||
| 311 | coef_probs{}; | ||
| 312 | |||
| 313 | void Convert(Vp9EntropyProbs& fc) { | ||
| 314 | std::memcpy(fc.inter_mode_prob.data(), inter_mode_prob.data(), fc.inter_mode_prob.size()); | ||
| 315 | |||
| 316 | std::memcpy(fc.intra_inter_prob.data(), intra_inter_prob.data(), | ||
| 317 | fc.intra_inter_prob.size()); | ||
| 318 | |||
| 319 | std::memcpy(fc.tx_8x8_prob.data(), tx_8x8_prob.data(), fc.tx_8x8_prob.size()); | ||
| 320 | std::memcpy(fc.tx_16x16_prob.data(), tx_16x16_prob.data(), fc.tx_16x16_prob.size()); | ||
| 321 | std::memcpy(fc.tx_32x32_prob.data(), tx_32x32_prob.data(), fc.tx_32x32_prob.size()); | ||
| 322 | |||
| 323 | for (s32 i = 0; i < 4; i++) { | ||
| 324 | for (s32 j = 0; j < 9; j++) { | ||
| 325 | fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i]; | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | std::memcpy(fc.partition_prob.data(), partition_prob.data(), fc.partition_prob.size()); | ||
| 330 | |||
| 331 | std::memcpy(fc.switchable_interp_prob.data(), switchable_interp_prob.data(), | ||
| 332 | fc.switchable_interp_prob.size()); | ||
| 333 | std::memcpy(fc.comp_inter_prob.data(), comp_inter_prob.data(), fc.comp_inter_prob.size()); | ||
| 334 | std::memcpy(fc.skip_probs.data(), skip_probs.data(), fc.skip_probs.size()); | ||
| 335 | |||
| 336 | std::memcpy(fc.joints.data(), joints.data(), fc.joints.size()); | ||
| 337 | |||
| 338 | std::memcpy(fc.sign.data(), sign.data(), fc.sign.size()); | ||
| 339 | std::memcpy(fc.class_0.data(), class_0.data(), fc.class_0.size()); | ||
| 340 | std::memcpy(fc.fr.data(), fr.data(), fc.fr.size()); | ||
| 341 | std::memcpy(fc.class_0_hp.data(), class_0_hp.data(), fc.class_0_hp.size()); | ||
| 342 | std::memcpy(fc.high_precision.data(), high_precision.data(), fc.high_precision.size()); | ||
| 343 | std::memcpy(fc.classes.data(), classes.data(), fc.classes.size()); | ||
| 344 | std::memcpy(fc.class_0_fr.data(), class_0_fr.data(), fc.class_0_fr.size()); | ||
| 345 | std::memcpy(fc.prob_bits.data(), pred_bits.data(), fc.prob_bits.size()); | ||
| 346 | std::memcpy(fc.single_ref_prob.data(), single_ref_prob.data(), fc.single_ref_prob.size()); | ||
| 347 | std::memcpy(fc.comp_ref_prob.data(), comp_ref_prob.data(), fc.comp_ref_prob.size()); | ||
| 348 | |||
| 349 | std::memcpy(fc.coef_probs.data(), coef_probs.data(), fc.coef_probs.size()); | ||
| 350 | } | ||
| 351 | }; | ||
| 352 | static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size"); | ||
| 353 | |||
| 354 | enum class Ref { Last, Golden, AltRef }; | ||
| 355 | |||
| 356 | struct RefPoolElement { | ||
| 357 | s64 frame{}; | ||
| 358 | Ref ref{}; | ||
| 359 | bool refresh{}; | ||
| 360 | }; | ||
| 361 | |||
| 362 | struct FrameContexts { | ||
| 363 | s64 from{}; | ||
| 364 | bool adapted{}; | ||
| 365 | Vp9EntropyProbs probs{}; | ||
| 366 | }; | ||
| 367 | |||
| 368 | }; // namespace Decoder | ||
| 369 | }; // namespace Tegra | ||
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp new file mode 100644 index 000000000..a5234ee47 --- /dev/null +++ b/src/video_core/command_classes/host1x.cpp | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "video_core/command_classes/host1x.h" | ||
| 7 | #include "video_core/gpu.h" | ||
| 8 | |||
| 9 | Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {} | ||
| 10 | |||
| 11 | Tegra::Host1x::~Host1x() = default; | ||
| 12 | |||
| 13 | void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { | ||
| 14 | u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u32); | ||
| 15 | std::memcpy(state_offset, &arguments, sizeof(u32)); | ||
| 16 | } | ||
| 17 | |||
| 18 | void Tegra::Host1x::ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments) { | ||
| 19 | StateWrite(static_cast<u32>(method), arguments[0]); | ||
| 20 | switch (method) { | ||
| 21 | case Method::WaitSyncpt: | ||
| 22 | Execute(arguments[0]); | ||
| 23 | break; | ||
| 24 | case Method::LoadSyncptPayload32: | ||
| 25 | syncpoint_value = arguments[0]; | ||
| 26 | break; | ||
| 27 | case Method::WaitSyncpt32: | ||
| 28 | Execute(arguments[0]); | ||
| 29 | break; | ||
| 30 | default: | ||
| 31 | UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method)); | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | void Tegra::Host1x::Execute(u32 data) { | ||
| 37 | // This method waits on a valid syncpoint. | ||
| 38 | // TODO: Implement when proper Async is in place | ||
| 39 | } | ||
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h new file mode 100644 index 000000000..501a5ed2e --- /dev/null +++ b/src/video_core/command_classes/host1x.h | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | #include "common/common_funcs.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace Tegra { | ||
| 12 | class GPU; | ||
| 13 | class Nvdec; | ||
| 14 | |||
| 15 | class Host1x { | ||
| 16 | public: | ||
| 17 | struct Host1xClassRegisters { | ||
| 18 | u32 incr_syncpt{}; | ||
| 19 | u32 incr_syncpt_ctrl{}; | ||
| 20 | u32 incr_syncpt_error{}; | ||
| 21 | INSERT_PADDING_WORDS(5); | ||
| 22 | u32 wait_syncpt{}; | ||
| 23 | u32 wait_syncpt_base{}; | ||
| 24 | u32 wait_syncpt_incr{}; | ||
| 25 | u32 load_syncpt_base{}; | ||
| 26 | u32 incr_syncpt_base{}; | ||
| 27 | u32 clear{}; | ||
| 28 | u32 wait{}; | ||
| 29 | u32 wait_with_interrupt{}; | ||
| 30 | u32 delay_use{}; | ||
| 31 | u32 tick_count_high{}; | ||
| 32 | u32 tick_count_low{}; | ||
| 33 | u32 tick_ctrl{}; | ||
| 34 | INSERT_PADDING_WORDS(23); | ||
| 35 | u32 ind_ctrl{}; | ||
| 36 | u32 ind_off2{}; | ||
| 37 | u32 ind_off{}; | ||
| 38 | std::array<u32, 31> ind_data{}; | ||
| 39 | INSERT_PADDING_WORDS(1); | ||
| 40 | u32 load_syncpoint_payload32{}; | ||
| 41 | u32 stall_ctrl{}; | ||
| 42 | u32 wait_syncpt32{}; | ||
| 43 | u32 wait_syncpt_base32{}; | ||
| 44 | u32 load_syncpt_base32{}; | ||
| 45 | u32 incr_syncpt_base32{}; | ||
| 46 | u32 stall_count_high{}; | ||
| 47 | u32 stall_count_low{}; | ||
| 48 | u32 xref_ctrl{}; | ||
| 49 | u32 channel_xref_high{}; | ||
| 50 | u32 channel_xref_low{}; | ||
| 51 | }; | ||
| 52 | static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size"); | ||
| 53 | |||
| 54 | enum class Method : u32 { | ||
| 55 | WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4, | ||
| 56 | LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4, | ||
| 57 | WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4, | ||
| 58 | }; | ||
| 59 | |||
| 60 | explicit Host1x(GPU& gpu); | ||
| 61 | ~Host1x(); | ||
| 62 | |||
| 63 | /// Writes the method into the state, Invoke Execute() if encountered | ||
| 64 | void ProcessMethod(Host1x::Method method, const std::vector<u32>& arguments); | ||
| 65 | |||
| 66 | private: | ||
| 67 | /// For Host1x, execute is waiting on a syncpoint previously written into the state | ||
| 68 | void Execute(u32 data); | ||
| 69 | |||
| 70 | /// Write argument into the provided offset | ||
| 71 | void StateWrite(u32 offset, u32 arguments); | ||
| 72 | |||
| 73 | u32 syncpoint_value{}; | ||
| 74 | Host1xClassRegisters state{}; | ||
| 75 | GPU& gpu; | ||
| 76 | }; | ||
| 77 | |||
| 78 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp new file mode 100644 index 000000000..ede9466eb --- /dev/null +++ b/src/video_core/command_classes/nvdec.cpp | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <bitset> | ||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/bit_util.h" | ||
| 8 | #include "core/memory.h" | ||
| 9 | #include "video_core/command_classes/nvdec.h" | ||
| 10 | #include "video_core/gpu.h" | ||
| 11 | #include "video_core/memory_manager.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | |||
| 15 | Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} | ||
| 16 | |||
| 17 | Nvdec::~Nvdec() = default; | ||
| 18 | |||
| 19 | void Nvdec::ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments) { | ||
| 20 | if (method == Method::SetVideoCodec) { | ||
| 21 | codec->StateWrite(static_cast<u32>(method), arguments[0]); | ||
| 22 | } else { | ||
| 23 | codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8); | ||
| 24 | } | ||
| 25 | |||
| 26 | switch (method) { | ||
| 27 | case Method::SetVideoCodec: | ||
| 28 | codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0])); | ||
| 29 | break; | ||
| 30 | case Method::Execute: | ||
| 31 | Execute(); | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | AVFrame* Nvdec::GetFrame() { | ||
| 37 | return codec->GetCurrentFrame(); | ||
| 38 | } | ||
| 39 | |||
| 40 | const AVFrame* Nvdec::GetFrame() const { | ||
| 41 | return codec->GetCurrentFrame(); | ||
| 42 | } | ||
| 43 | |||
| 44 | void Nvdec::Execute() { | ||
| 45 | switch (codec->GetCurrentCodec()) { | ||
| 46 | case NvdecCommon::VideoCodec::H264: | ||
| 47 | case NvdecCommon::VideoCodec::Vp9: | ||
| 48 | codec->Decode(); | ||
| 49 | break; | ||
| 50 | default: | ||
| 51 | UNIMPLEMENTED_MSG("Unknown codec {}", static_cast<u32>(codec->GetCurrentCodec())); | ||
| 52 | break; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h new file mode 100644 index 000000000..c1a9d843e --- /dev/null +++ b/src/video_core/command_classes/nvdec.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | #include "common/common_funcs.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/command_classes/codecs/codec.h" | ||
| 11 | |||
| 12 | namespace Tegra { | ||
| 13 | class GPU; | ||
| 14 | |||
| 15 | class Nvdec { | ||
| 16 | public: | ||
| 17 | enum class Method : u32 { | ||
| 18 | SetVideoCodec = 0x80, | ||
| 19 | Execute = 0xc0, | ||
| 20 | }; | ||
| 21 | |||
| 22 | explicit Nvdec(GPU& gpu); | ||
| 23 | ~Nvdec(); | ||
| 24 | |||
| 25 | /// Writes the method into the state, Invoke Execute() if encountered | ||
| 26 | void ProcessMethod(Nvdec::Method method, const std::vector<u32>& arguments); | ||
| 27 | |||
| 28 | /// Return most recently decoded frame | ||
| 29 | AVFrame* GetFrame(); | ||
| 30 | const AVFrame* GetFrame() const; | ||
| 31 | |||
| 32 | private: | ||
| 33 | /// Invoke codec to decode a frame | ||
| 34 | void Execute(); | ||
| 35 | |||
| 36 | GPU& gpu; | ||
| 37 | std::unique_ptr<Tegra::Codec> codec; | ||
| 38 | }; | ||
| 39 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h new file mode 100644 index 000000000..01b5e086d --- /dev/null +++ b/src/video_core/command_classes/nvdec_common.h | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_funcs.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | namespace Tegra::NvdecCommon { | ||
| 11 | |||
| 12 | struct NvdecRegisters { | ||
| 13 | INSERT_PADDING_WORDS(256); | ||
| 14 | u64 set_codec_id{}; | ||
| 15 | INSERT_PADDING_WORDS(254); | ||
| 16 | u64 set_platform_id{}; | ||
| 17 | u64 picture_info_offset{}; | ||
| 18 | u64 frame_bitstream_offset{}; | ||
| 19 | u64 frame_number{}; | ||
| 20 | u64 h264_slice_data_offsets{}; | ||
| 21 | u64 h264_mv_dump_offset{}; | ||
| 22 | INSERT_PADDING_WORDS(6); | ||
| 23 | u64 frame_stats_offset{}; | ||
| 24 | u64 h264_last_surface_luma_offset{}; | ||
| 25 | u64 h264_last_surface_chroma_offset{}; | ||
| 26 | std::array<u64, 17> surface_luma_offset{}; | ||
| 27 | std::array<u64, 17> surface_chroma_offset{}; | ||
| 28 | INSERT_PADDING_WORDS(132); | ||
| 29 | u64 vp9_entropy_probs_offset{}; | ||
| 30 | u64 vp9_backward_updates_offset{}; | ||
| 31 | u64 vp9_last_frame_segmap_offset{}; | ||
| 32 | u64 vp9_curr_frame_segmap_offset{}; | ||
| 33 | INSERT_PADDING_WORDS(2); | ||
| 34 | u64 vp9_last_frame_mvs_offset{}; | ||
| 35 | u64 vp9_curr_frame_mvs_offset{}; | ||
| 36 | INSERT_PADDING_WORDS(2); | ||
| 37 | }; | ||
| 38 | static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); | ||
| 39 | |||
| 40 | enum class VideoCodec : u32 { | ||
| 41 | None = 0x0, | ||
| 42 | H264 = 0x3, | ||
| 43 | Vp8 = 0x5, | ||
| 44 | H265 = 0x7, | ||
| 45 | Vp9 = 0x9, | ||
| 46 | }; | ||
| 47 | |||
| 48 | } // namespace Tegra::NvdecCommon | ||
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp new file mode 100644 index 000000000..a0ab44855 --- /dev/null +++ b/src/video_core/command_classes/sync_manager.cpp | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | // MIT License | ||
| 2 | // | ||
| 3 | // Copyright (c) Ryujinx Team and Contributors | ||
| 4 | // | ||
| 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and | ||
| 6 | // associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| 7 | // including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| 8 | // sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| 9 | // furnished to do so, subject to the following conditions: | ||
| 10 | // | ||
| 11 | // The above copyright notice and this permission notice shall be included in all copies or | ||
| 12 | // substantial portions of the Software. | ||
| 13 | // | ||
| 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT | ||
| 15 | // NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 16 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| 17 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| 19 | // | ||
| 20 | |||
| 21 | #include <algorithm> | ||
| 22 | #include "sync_manager.h" | ||
| 23 | #include "video_core/gpu.h" | ||
| 24 | |||
| 25 | namespace Tegra { | ||
| 26 | SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {} | ||
| 27 | SyncptIncrManager::~SyncptIncrManager() = default; | ||
| 28 | |||
| 29 | void SyncptIncrManager::Increment(u32 id) { | ||
| 30 | increments.push_back(SyncptIncr{0, id, true}); | ||
| 31 | IncrementAllDone(); | ||
| 32 | } | ||
| 33 | |||
| 34 | u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) { | ||
| 35 | const u32 handle = current_id++; | ||
| 36 | increments.push_back(SyncptIncr{handle, class_id, id}); | ||
| 37 | return handle; | ||
| 38 | } | ||
| 39 | |||
| 40 | void SyncptIncrManager::SignalDone(u32 handle) { | ||
| 41 | auto done_incr = std::find_if(increments.begin(), increments.end(), | ||
| 42 | [handle](SyncptIncr incr) { return incr.id == handle; }); | ||
| 43 | if (done_incr != increments.end()) { | ||
| 44 | const SyncptIncr incr = *done_incr; | ||
| 45 | *done_incr = SyncptIncr{incr.id, incr.class_id, incr.syncpt_id, true}; | ||
| 46 | } | ||
| 47 | IncrementAllDone(); | ||
| 48 | } | ||
| 49 | |||
| 50 | void SyncptIncrManager::IncrementAllDone() { | ||
| 51 | std::size_t done_count = 0; | ||
| 52 | for (; done_count < increments.size(); ++done_count) { | ||
| 53 | if (!increments[done_count].complete) { | ||
| 54 | break; | ||
| 55 | } | ||
| 56 | gpu.IncrementSyncPoint(increments[done_count].syncpt_id); | ||
| 57 | } | ||
| 58 | increments.erase(increments.begin(), increments.begin() + done_count); | ||
| 59 | } | ||
| 60 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/command_classes/sync_manager.h new file mode 100644 index 000000000..353b67573 --- /dev/null +++ b/src/video_core/command_classes/sync_manager.h | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // MIT License | ||
| 2 | // | ||
| 3 | // Copyright (c) Ryujinx Team and Contributors | ||
| 4 | // | ||
| 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and | ||
| 6 | // associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| 7 | // including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| 8 | // sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| 9 | // furnished to do so, subject to the following conditions: | ||
| 10 | // | ||
| 11 | // The above copyright notice and this permission notice shall be included in all copies or | ||
| 12 | // substantial portions of the Software. | ||
| 13 | // | ||
| 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT | ||
| 15 | // NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 16 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| 17 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| 19 | // | ||
| 20 | |||
| 21 | #pragma once | ||
| 22 | |||
| 23 | #include <mutex> | ||
| 24 | #include <vector> | ||
| 25 | #include "common/common_types.h" | ||
| 26 | |||
| 27 | namespace Tegra { | ||
| 28 | class GPU; | ||
| 29 | struct SyncptIncr { | ||
| 30 | u32 id; | ||
| 31 | u32 class_id; | ||
| 32 | u32 syncpt_id; | ||
| 33 | bool complete; | ||
| 34 | |||
| 35 | SyncptIncr(u32 id, u32 syncpt_id_, u32 class_id_, bool done = false) | ||
| 36 | : id(id), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {} | ||
| 37 | }; | ||
| 38 | |||
| 39 | class SyncptIncrManager { | ||
| 40 | public: | ||
| 41 | explicit SyncptIncrManager(GPU& gpu); | ||
| 42 | ~SyncptIncrManager(); | ||
| 43 | |||
| 44 | /// Add syncpoint id and increment all | ||
| 45 | void Increment(u32 id); | ||
| 46 | |||
| 47 | /// Returns a handle to increment later | ||
| 48 | u32 IncrementWhenDone(u32 class_id, u32 id); | ||
| 49 | |||
| 50 | /// IncrememntAllDone, including handle | ||
| 51 | void SignalDone(u32 handle); | ||
| 52 | |||
| 53 | /// Increment all sequential pending increments that are already done. | ||
| 54 | void IncrementAllDone(); | ||
| 55 | |||
| 56 | private: | ||
| 57 | std::vector<SyncptIncr> increments; | ||
| 58 | std::mutex increment_lock; | ||
| 59 | u32 current_id{}; | ||
| 60 | |||
| 61 | GPU& gpu; | ||
| 62 | }; | ||
| 63 | |||
| 64 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp new file mode 100644 index 000000000..66e15a1a8 --- /dev/null +++ b/src/video_core/command_classes/vic.cpp | |||
| @@ -0,0 +1,180 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include "common/assert.h" | ||
| 7 | #include "video_core/command_classes/nvdec.h" | ||
| 8 | #include "video_core/command_classes/vic.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/gpu.h" | ||
| 11 | #include "video_core/memory_manager.h" | ||
| 12 | #include "video_core/texture_cache/surface_params.h" | ||
| 13 | |||
| 14 | extern "C" { | ||
| 15 | #include <libswscale/swscale.h> | ||
| 16 | } | ||
| 17 | |||
| 18 | namespace Tegra { | ||
| 19 | |||
| 20 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) | ||
| 21 | : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {} | ||
| 22 | Vic::~Vic() = default; | ||
| 23 | |||
| 24 | void Vic::VicStateWrite(u32 offset, u32 arguments) { | ||
| 25 | u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32); | ||
| 26 | std::memcpy(state_offset, &arguments, sizeof(u32)); | ||
| 27 | } | ||
| 28 | |||
| 29 | void Vic::ProcessMethod(Vic::Method method, const std::vector<u32>& arguments) { | ||
| 30 | LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method)); | ||
| 31 | VicStateWrite(static_cast<u32>(method), arguments[0]); | ||
| 32 | const u64 arg = static_cast<u64>(arguments[0]) << 8; | ||
| 33 | switch (method) { | ||
| 34 | case Method::Execute: | ||
| 35 | Execute(); | ||
| 36 | break; | ||
| 37 | case Method::SetConfigStructOffset: | ||
| 38 | config_struct_address = arg; | ||
| 39 | break; | ||
| 40 | case Method::SetOutputSurfaceLumaOffset: | ||
| 41 | output_surface_luma_address = arg; | ||
| 42 | break; | ||
| 43 | case Method::SetOutputSurfaceChromaUOffset: | ||
| 44 | output_surface_chroma_u_address = arg; | ||
| 45 | break; | ||
| 46 | case Method::SetOutputSurfaceChromaVOffset: | ||
| 47 | output_surface_chroma_v_address = arg; | ||
| 48 | break; | ||
| 49 | default: | ||
| 50 | break; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | void Vic::Execute() { | ||
| 55 | if (output_surface_luma_address == 0) { | ||
| 56 | LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}", | ||
| 57 | vic_state.output_surface.luma_offset); | ||
| 58 | return; | ||
| 59 | } | ||
| 60 | const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)}; | ||
| 61 | const VideoPixelFormat pixel_format = | ||
| 62 | static_cast<VideoPixelFormat>(config.pixel_format.Value()); | ||
| 63 | switch (pixel_format) { | ||
| 64 | case VideoPixelFormat::BGRA8: | ||
| 65 | case VideoPixelFormat::RGBA8: { | ||
| 66 | LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); | ||
| 67 | const auto* frame = nvdec_processor->GetFrame(); | ||
| 68 | |||
| 69 | if (!frame || frame->width == 0 || frame->height == 0) { | ||
| 70 | return; | ||
| 71 | } | ||
| 72 | if (scaler_ctx == nullptr || frame->width != scaler_width || | ||
| 73 | frame->height != scaler_height) { | ||
| 74 | const AVPixelFormat target_format = | ||
| 75 | (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA; | ||
| 76 | |||
| 77 | sws_freeContext(scaler_ctx); | ||
| 78 | scaler_ctx = nullptr; | ||
| 79 | |||
| 80 | // FFmpeg returns all frames in YUV420, convert it into expected format | ||
| 81 | scaler_ctx = | ||
| 82 | sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width, | ||
| 83 | frame->height, target_format, 0, nullptr, nullptr, nullptr); | ||
| 84 | |||
| 85 | scaler_width = frame->width; | ||
| 86 | scaler_height = frame->height; | ||
| 87 | } | ||
| 88 | // Get Converted frame | ||
| 89 | const std::size_t linear_size = frame->width * frame->height * 4; | ||
| 90 | |||
| 91 | using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; | ||
| 92 | AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free}; | ||
| 93 | |||
| 94 | const int converted_stride{frame->width * 4}; | ||
| 95 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; | ||
| 96 | |||
| 97 | sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, | ||
| 98 | &converted_frame_buf_addr, &converted_stride); | ||
| 99 | |||
| 100 | const u32 blk_kind = static_cast<u32>(config.block_linear_kind); | ||
| 101 | if (blk_kind != 0) { | ||
| 102 | // swizzle pitch linear to block linear | ||
| 103 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | ||
| 104 | const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, | ||
| 105 | block_height, 0); | ||
| 106 | std::vector<u8> swizzled_data(size); | ||
| 107 | Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, | ||
| 108 | swizzled_data.data(), converted_frame_buffer.get(), | ||
| 109 | false, block_height, 0, 1); | ||
| 110 | |||
| 111 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); | ||
| 112 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 113 | } else { | ||
| 114 | // send pitch linear frame | ||
| 115 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | ||
| 116 | linear_size); | ||
| 117 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 118 | } | ||
| 119 | break; | ||
| 120 | } | ||
| 121 | case VideoPixelFormat::Yuv420: { | ||
| 122 | LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); | ||
| 123 | |||
| 124 | const auto* frame = nvdec_processor->GetFrame(); | ||
| 125 | |||
| 126 | if (!frame || frame->width == 0 || frame->height == 0) { | ||
| 127 | return; | ||
| 128 | } | ||
| 129 | |||
| 130 | const std::size_t surface_width = config.surface_width_minus1 + 1; | ||
| 131 | const std::size_t surface_height = config.surface_height_minus1 + 1; | ||
| 132 | const std::size_t half_width = surface_width / 2; | ||
| 133 | const std::size_t half_height = config.surface_height_minus1 / 2; | ||
| 134 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xff; | ||
| 135 | |||
| 136 | const auto* luma_ptr = frame->data[0]; | ||
| 137 | const auto* chroma_b_ptr = frame->data[1]; | ||
| 138 | const auto* chroma_r_ptr = frame->data[2]; | ||
| 139 | const auto stride = frame->linesize[0]; | ||
| 140 | const auto half_stride = frame->linesize[1]; | ||
| 141 | |||
| 142 | std::vector<u8> luma_buffer(aligned_width * surface_height); | ||
| 143 | std::vector<u8> chroma_buffer(aligned_width * half_height); | ||
| 144 | |||
| 145 | // Populate luma buffer | ||
| 146 | for (std::size_t y = 0; y < surface_height - 1; ++y) { | ||
| 147 | std::size_t src = y * stride; | ||
| 148 | std::size_t dst = y * aligned_width; | ||
| 149 | |||
| 150 | std::size_t size = surface_width; | ||
| 151 | |||
| 152 | for (std::size_t offset = 0; offset < size; ++offset) { | ||
| 153 | luma_buffer[dst + offset] = luma_ptr[src + offset]; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), | ||
| 157 | luma_buffer.size()); | ||
| 158 | |||
| 159 | // Populate chroma buffer from both channels with interleaving. | ||
| 160 | for (std::size_t y = 0; y < half_height; ++y) { | ||
| 161 | std::size_t src = y * half_stride; | ||
| 162 | std::size_t dst = y * aligned_width; | ||
| 163 | |||
| 164 | for (std::size_t x = 0; x < half_width; ++x) { | ||
| 165 | chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x]; | ||
| 166 | chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x]; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), | ||
| 170 | chroma_buffer.size()); | ||
| 171 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 172 | break; | ||
| 173 | } | ||
| 174 | default: | ||
| 175 | UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value()); | ||
| 176 | break; | ||
| 177 | } | ||
| 178 | } | ||
| 179 | |||
| 180 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h new file mode 100644 index 000000000..dd0a2aed8 --- /dev/null +++ b/src/video_core/command_classes/vic.h | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <vector> | ||
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | struct SwsContext; | ||
| 13 | |||
| 14 | namespace Tegra { | ||
| 15 | class GPU; | ||
| 16 | class Nvdec; | ||
| 17 | |||
| 18 | struct PlaneOffsets { | ||
| 19 | u32 luma_offset{}; | ||
| 20 | u32 chroma_u_offset{}; | ||
| 21 | u32 chroma_v_offset{}; | ||
| 22 | }; | ||
| 23 | |||
| 24 | struct VicRegisters { | ||
| 25 | INSERT_PADDING_WORDS(64); | ||
| 26 | u32 nop{}; | ||
| 27 | INSERT_PADDING_WORDS(15); | ||
| 28 | u32 pm_trigger{}; | ||
| 29 | INSERT_PADDING_WORDS(47); | ||
| 30 | u32 set_application_id{}; | ||
| 31 | u32 set_watchdog_timer{}; | ||
| 32 | INSERT_PADDING_WORDS(17); | ||
| 33 | u32 context_save_area{}; | ||
| 34 | u32 context_switch{}; | ||
| 35 | INSERT_PADDING_WORDS(43); | ||
| 36 | u32 execute{}; | ||
| 37 | INSERT_PADDING_WORDS(63); | ||
| 38 | std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{}; | ||
| 39 | u32 picture_index{}; | ||
| 40 | u32 control_params{}; | ||
| 41 | u32 config_struct_offset{}; | ||
| 42 | u32 filter_struct_offset{}; | ||
| 43 | u32 palette_offset{}; | ||
| 44 | u32 hist_offset{}; | ||
| 45 | u32 context_id{}; | ||
| 46 | u32 fce_ucode_size{}; | ||
| 47 | PlaneOffsets output_surface{}; | ||
| 48 | u32 fce_ucode_offset{}; | ||
| 49 | INSERT_PADDING_WORDS(4); | ||
| 50 | std::array<u32, 8> slot_context_id{}; | ||
| 51 | INSERT_PADDING_WORDS(16); | ||
| 52 | }; | ||
| 53 | static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size"); | ||
| 54 | |||
| 55 | class Vic { | ||
| 56 | public: | ||
| 57 | enum class Method : u32 { | ||
| 58 | Execute = 0xc0, | ||
| 59 | SetControlParams = 0x1c1, | ||
| 60 | SetConfigStructOffset = 0x1c2, | ||
| 61 | SetOutputSurfaceLumaOffset = 0x1c8, | ||
| 62 | SetOutputSurfaceChromaUOffset = 0x1c9, | ||
| 63 | SetOutputSurfaceChromaVOffset = 0x1ca | ||
| 64 | }; | ||
| 65 | |||
| 66 | explicit Vic(GPU& gpu, std::shared_ptr<Tegra::Nvdec> nvdec_processor); | ||
| 67 | ~Vic(); | ||
| 68 | |||
| 69 | /// Write to the device state. | ||
| 70 | void ProcessMethod(Vic::Method method, const std::vector<u32>& arguments); | ||
| 71 | |||
| 72 | private: | ||
| 73 | void Execute(); | ||
| 74 | |||
| 75 | void VicStateWrite(u32 offset, u32 arguments); | ||
| 76 | VicRegisters vic_state{}; | ||
| 77 | |||
| 78 | enum class VideoPixelFormat : u64_le { | ||
| 79 | RGBA8 = 0x1f, | ||
| 80 | BGRA8 = 0x20, | ||
| 81 | Yuv420 = 0x44, | ||
| 82 | }; | ||
| 83 | |||
| 84 | union VicConfig { | ||
| 85 | u64_le raw{}; | ||
| 86 | BitField<0, 7, u64_le> pixel_format; | ||
| 87 | BitField<7, 2, u64_le> chroma_loc_horiz; | ||
| 88 | BitField<9, 2, u64_le> chroma_loc_vert; | ||
| 89 | BitField<11, 4, u64_le> block_linear_kind; | ||
| 90 | BitField<15, 4, u64_le> block_linear_height_log2; | ||
| 91 | BitField<19, 3, u64_le> reserved0; | ||
| 92 | BitField<22, 10, u64_le> reserved1; | ||
| 93 | BitField<32, 14, u64_le> surface_width_minus1; | ||
| 94 | BitField<46, 14, u64_le> surface_height_minus1; | ||
| 95 | }; | ||
| 96 | |||
| 97 | GPU& gpu; | ||
| 98 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; | ||
| 99 | |||
| 100 | GPUVAddr config_struct_address{}; | ||
| 101 | GPUVAddr output_surface_luma_address{}; | ||
| 102 | GPUVAddr output_surface_chroma_u_address{}; | ||
| 103 | GPUVAddr output_surface_chroma_v_address{}; | ||
| 104 | |||
| 105 | SwsContext* scaler_ctx{}; | ||
| 106 | s32 scaler_width{}; | ||
| 107 | s32 scaler_height{}; | ||
| 108 | }; | ||
| 109 | |||
| 110 | } // namespace Tegra | ||