diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/cdma_pusher.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/codec.cpp | 85 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/codec.h | 12 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/h264.cpp | 207 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/h264.h | 132 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/vp9.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/vp9_types.h | 307 | ||||
| -rw-r--r-- | src/video_core/command_classes/nvdec.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/command_classes/nvdec.h | 8 | ||||
| -rw-r--r-- | src/video_core/command_classes/nvdec_common.h | 103 |
10 files changed, 522 insertions, 356 deletions
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index a3fda1094..8b86ad050 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp | |||
| @@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { | |||
| 103 | case ThiMethod::SetMethod1: | 103 | case ThiMethod::SetMethod1: |
| 104 | LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", | 104 | LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", |
| 105 | static_cast<u32>(nvdec_thi_state.method_0)); | 105 | static_cast<u32>(nvdec_thi_state.method_0)); |
| 106 | nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), | 106 | nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data); |
| 107 | data); | ||
| 108 | break; | 107 | break; |
| 109 | default: | 108 | default: |
| 110 | break; | 109 | break; |
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index d02dc6260..1b4bbc8ac 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp | |||
| @@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) { | |||
| 23 | av_free(ptr); | 23 | av_free(ptr); |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | Codec::Codec(GPU& gpu_) | 26 | Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) |
| 27 | : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), | 27 | : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), |
| 28 | vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} | 28 | vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} |
| 29 | 29 | ||
| 30 | Codec::~Codec() { | 30 | Codec::~Codec() { |
| @@ -43,46 +43,48 @@ Codec::~Codec() { | |||
| 43 | avcodec_close(av_codec_ctx); | 43 | avcodec_close(av_codec_ctx); |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | void Codec::Initialize() { | ||
| 47 | AVCodecID codec{AV_CODEC_ID_NONE}; | ||
| 48 | switch (current_codec) { | ||
| 49 | case NvdecCommon::VideoCodec::H264: | ||
| 50 | codec = AV_CODEC_ID_H264; | ||
| 51 | break; | ||
| 52 | case NvdecCommon::VideoCodec::Vp9: | ||
| 53 | codec = AV_CODEC_ID_VP9; | ||
| 54 | break; | ||
| 55 | default: | ||
| 56 | return; | ||
| 57 | } | ||
| 58 | av_codec = avcodec_find_decoder(codec); | ||
| 59 | av_codec_ctx = avcodec_alloc_context3(av_codec); | ||
| 60 | av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); | ||
| 61 | |||
| 62 | // TODO(ameerj): libavcodec gpu hw acceleration | ||
| 63 | |||
| 64 | const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); | ||
| 65 | if (av_error < 0) { | ||
| 66 | LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); | ||
| 67 | avcodec_close(av_codec_ctx); | ||
| 68 | return; | ||
| 69 | } | ||
| 70 | initialized = true; | ||
| 71 | return; | ||
| 72 | } | ||
| 73 | |||
| 46 | void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { | 74 | void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { |
| 47 | if (current_codec != codec) { | 75 | if (current_codec != codec) { |
| 48 | LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); | ||
| 49 | current_codec = codec; | 76 | current_codec = codec; |
| 77 | LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); | ||
| 50 | } | 78 | } |
| 51 | } | 79 | } |
| 52 | 80 | ||
| 53 | void Codec::StateWrite(u32 offset, u64 arguments) { | ||
| 54 | u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64); | ||
| 55 | std::memcpy(state_offset, &arguments, sizeof(u64)); | ||
| 56 | } | ||
| 57 | |||
| 58 | void Codec::Decode() { | 81 | void Codec::Decode() { |
| 59 | bool is_first_frame = false; | 82 | const bool is_first_frame = !initialized; |
| 60 | if (!initialized) { | 83 | if (!initialized) { |
| 61 | if (current_codec == NvdecCommon::VideoCodec::H264) { | 84 | Initialize(); |
| 62 | av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); | ||
| 63 | } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { | ||
| 64 | av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); | ||
| 65 | } else { | ||
| 66 | LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); | ||
| 67 | return; | ||
| 68 | } | ||
| 69 | |||
| 70 | av_codec_ctx = avcodec_alloc_context3(av_codec); | ||
| 71 | av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); | ||
| 72 | |||
| 73 | // TODO(ameerj): libavcodec gpu hw acceleration | ||
| 74 | |||
| 75 | const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); | ||
| 76 | if (av_error < 0) { | ||
| 77 | LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); | ||
| 78 | avcodec_close(av_codec_ctx); | ||
| 79 | return; | ||
| 80 | } | ||
| 81 | initialized = true; | ||
| 82 | is_first_frame = true; | ||
| 83 | } | 85 | } |
| 84 | bool vp9_hidden_frame = false; | ||
| 85 | 86 | ||
| 87 | bool vp9_hidden_frame = false; | ||
| 86 | AVPacket packet{}; | 88 | AVPacket packet{}; |
| 87 | av_init_packet(&packet); | 89 | av_init_packet(&packet); |
| 88 | std::vector<u8> frame_data; | 90 | std::vector<u8> frame_data; |
| @@ -95,7 +97,7 @@ void Codec::Decode() { | |||
| 95 | } | 97 | } |
| 96 | 98 | ||
| 97 | packet.data = frame_data.data(); | 99 | packet.data = frame_data.data(); |
| 98 | packet.size = static_cast<int>(frame_data.size()); | 100 | packet.size = static_cast<s32>(frame_data.size()); |
| 99 | 101 | ||
| 100 | avcodec_send_packet(av_codec_ctx, &packet); | 102 | avcodec_send_packet(av_codec_ctx, &packet); |
| 101 | 103 | ||
| @@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { | |||
| 127 | return current_codec; | 129 | return current_codec; |
| 128 | } | 130 | } |
| 129 | 131 | ||
| 132 | std::string_view Codec::GetCurrentCodecName() const { | ||
| 133 | switch (current_codec) { | ||
| 134 | case NvdecCommon::VideoCodec::None: | ||
| 135 | return "None"; | ||
| 136 | case NvdecCommon::VideoCodec::H264: | ||
| 137 | return "H264"; | ||
| 138 | case NvdecCommon::VideoCodec::Vp8: | ||
| 139 | return "VP8"; | ||
| 140 | case NvdecCommon::VideoCodec::H265: | ||
| 141 | return "H265"; | ||
| 142 | case NvdecCommon::VideoCodec::Vp9: | ||
| 143 | return "VP9"; | ||
| 144 | default: | ||
| 145 | return "Unknown"; | ||
| 146 | } | ||
| 147 | }; | ||
| 148 | |||
| 130 | } // namespace Tegra | 149 | } // namespace Tegra |
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 3e135a2a6..f2aef1699 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h | |||
| @@ -42,15 +42,15 @@ class VP9; | |||
| 42 | 42 | ||
| 43 | class Codec { | 43 | class Codec { |
| 44 | public: | 44 | public: |
| 45 | explicit Codec(GPU& gpu); | 45 | explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); |
| 46 | ~Codec(); | 46 | ~Codec(); |
| 47 | 47 | ||
| 48 | /// Initialize the codec, returning success or failure | ||
| 49 | void Initialize(); | ||
| 50 | |||
| 48 | /// Sets NVDEC video stream codec | 51 | /// Sets NVDEC video stream codec |
| 49 | void SetTargetCodec(NvdecCommon::VideoCodec codec); | 52 | void SetTargetCodec(NvdecCommon::VideoCodec codec); |
| 50 | 53 | ||
| 51 | /// Populate NvdecRegisters state with argument value at the provided offset | ||
| 52 | void StateWrite(u32 offset, u64 arguments); | ||
| 53 | |||
| 54 | /// Call decoders to construct headers, decode AVFrame with ffmpeg | 54 | /// Call decoders to construct headers, decode AVFrame with ffmpeg |
| 55 | void Decode(); | 55 | void Decode(); |
| 56 | 56 | ||
| @@ -59,6 +59,8 @@ public: | |||
| 59 | 59 | ||
| 60 | /// Returns the value of current_codec | 60 | /// Returns the value of current_codec |
| 61 | [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; | 61 | [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; |
| 62 | /// Return name of the current codec | ||
| 63 | [[nodiscard]] std::string_view GetCurrentCodecName() const; | ||
| 62 | 64 | ||
| 63 | private: | 65 | private: |
| 64 | bool initialized{}; | 66 | bool initialized{}; |
| @@ -68,10 +70,10 @@ private: | |||
| 68 | AVCodecContext* av_codec_ctx{nullptr}; | 70 | AVCodecContext* av_codec_ctx{nullptr}; |
| 69 | 71 | ||
| 70 | GPU& gpu; | 72 | GPU& gpu; |
| 73 | const NvdecCommon::NvdecRegisters& state; | ||
| 71 | std::unique_ptr<Decoder::H264> h264_decoder; | 74 | std::unique_ptr<Decoder::H264> h264_decoder; |
| 72 | std::unique_ptr<Decoder::VP9> vp9_decoder; | 75 | std::unique_ptr<Decoder::VP9> vp9_decoder; |
| 73 | 76 | ||
| 74 | NvdecCommon::NvdecRegisters state{}; | ||
| 75 | std::queue<AVFramePtr> av_frames{}; | 77 | std::queue<AVFramePtr> av_frames{}; |
| 76 | }; | 78 | }; |
| 77 | 79 | ||
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index fea6aed98..5fb6d45ee 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp | |||
| @@ -45,134 +45,129 @@ H264::~H264() = default; | |||
| 45 | 45 | ||
| 46 | const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, | 46 | const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, |
| 47 | bool is_first_frame) { | 47 | bool is_first_frame) { |
| 48 | H264DecoderContext context{}; | 48 | H264DecoderContext context; |
| 49 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); | 49 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); |
| 50 | 50 | ||
| 51 | const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); | 51 | const s64 frame_number = context.h264_parameter_set.frame_number.Value(); |
| 52 | if (!is_first_frame && frame_number != 0) { | 52 | if (!is_first_frame && frame_number != 0) { |
| 53 | frame.resize(context.frame_data_size); | 53 | frame.resize(context.stream_len); |
| 54 | |||
| 55 | gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); | 54 | gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); |
| 56 | } else { | 55 | return frame; |
| 57 | /// Encode header | 56 | } |
| 58 | H264BitWriter writer{}; | ||
| 59 | writer.WriteU(1, 24); | ||
| 60 | writer.WriteU(0, 1); | ||
| 61 | writer.WriteU(3, 2); | ||
| 62 | writer.WriteU(7, 5); | ||
| 63 | writer.WriteU(100, 8); | ||
| 64 | writer.WriteU(0, 8); | ||
| 65 | writer.WriteU(31, 8); | ||
| 66 | writer.WriteUe(0); | ||
| 67 | const auto chroma_format_idc = | ||
| 68 | static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3); | ||
| 69 | writer.WriteUe(chroma_format_idc); | ||
| 70 | if (chroma_format_idc == 3) { | ||
| 71 | writer.WriteBit(false); | ||
| 72 | } | ||
| 73 | |||
| 74 | writer.WriteUe(0); | ||
| 75 | writer.WriteUe(0); | ||
| 76 | writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag | ||
| 77 | writer.WriteBit(false); // Scaling matrix present flag | ||
| 78 | |||
| 79 | const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3); | ||
| 80 | writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf)); | ||
| 81 | writer.WriteUe(order_cnt_type); | ||
| 82 | if (order_cnt_type == 0) { | ||
| 83 | writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); | ||
| 84 | } else if (order_cnt_type == 1) { | ||
| 85 | writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); | ||
| 86 | |||
| 87 | writer.WriteSe(0); | ||
| 88 | writer.WriteSe(0); | ||
| 89 | writer.WriteUe(0); | ||
| 90 | } | ||
| 91 | |||
| 92 | const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / | ||
| 93 | (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); | ||
| 94 | 57 | ||
| 95 | writer.WriteUe(16); | 58 | // Encode header |
| 59 | H264BitWriter writer{}; | ||
| 60 | writer.WriteU(1, 24); | ||
| 61 | writer.WriteU(0, 1); | ||
| 62 | writer.WriteU(3, 2); | ||
| 63 | writer.WriteU(7, 5); | ||
| 64 | writer.WriteU(100, 8); | ||
| 65 | writer.WriteU(0, 8); | ||
| 66 | writer.WriteU(31, 8); | ||
| 67 | writer.WriteUe(0); | ||
| 68 | const u32 chroma_format_idc = | ||
| 69 | static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value()); | ||
| 70 | writer.WriteUe(chroma_format_idc); | ||
| 71 | if (chroma_format_idc == 3) { | ||
| 96 | writer.WriteBit(false); | 72 | writer.WriteBit(false); |
| 97 | writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); | 73 | } |
| 98 | writer.WriteUe(pic_height - 1); | ||
| 99 | writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); | ||
| 100 | |||
| 101 | if (!context.h264_parameter_set.frame_mbs_only_flag) { | ||
| 102 | writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); | ||
| 103 | } | ||
| 104 | 74 | ||
| 105 | writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); | 75 | writer.WriteUe(0); |
| 106 | writer.WriteBit(false); // Frame cropping flag | 76 | writer.WriteUe(0); |
| 107 | writer.WriteBit(false); // VUI parameter present flag | 77 | writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag |
| 78 | writer.WriteBit(false); // Scaling matrix present flag | ||
| 108 | 79 | ||
| 109 | writer.End(); | 80 | writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); |
| 110 | 81 | ||
| 111 | // H264 PPS | 82 | const auto order_cnt_type = |
| 112 | writer.WriteU(1, 24); | 83 | static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value()); |
| 113 | writer.WriteU(0, 1); | 84 | writer.WriteUe(order_cnt_type); |
| 114 | writer.WriteU(3, 2); | 85 | if (order_cnt_type == 0) { |
| 115 | writer.WriteU(8, 5); | 86 | writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); |
| 87 | } else if (order_cnt_type == 1) { | ||
| 88 | writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); | ||
| 116 | 89 | ||
| 90 | writer.WriteSe(0); | ||
| 91 | writer.WriteSe(0); | ||
| 117 | writer.WriteUe(0); | 92 | writer.WriteUe(0); |
| 118 | writer.WriteUe(0); | 93 | } |
| 119 | 94 | ||
| 120 | writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); | 95 | const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / |
| 121 | writer.WriteBit(false); | 96 | (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); |
| 122 | writer.WriteUe(0); | 97 | |
| 123 | writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); | 98 | writer.WriteUe(16); |
| 124 | writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); | 99 | writer.WriteBit(false); |
| 125 | writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); | 100 | writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); |
| 126 | writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); | 101 | writer.WriteUe(pic_height - 1); |
| 127 | s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); | 102 | writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); |
| 128 | pic_init_qp = (pic_init_qp << 26) >> 26; | ||
| 129 | writer.WriteSe(pic_init_qp); | ||
| 130 | writer.WriteSe(0); | ||
| 131 | s32 chroma_qp_index_offset = | ||
| 132 | static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f); | ||
| 133 | chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; | ||
| 134 | 103 | ||
| 135 | writer.WriteSe(chroma_qp_index_offset); | 104 | if (!context.h264_parameter_set.frame_mbs_only_flag) { |
| 136 | writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); | 105 | writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); |
| 137 | writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); | 106 | } |
| 138 | writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); | ||
| 139 | writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); | ||
| 140 | 107 | ||
| 108 | writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); | ||
| 109 | writer.WriteBit(false); // Frame cropping flag | ||
| 110 | writer.WriteBit(false); // VUI parameter present flag | ||
| 111 | |||
| 112 | writer.End(); | ||
| 113 | |||
| 114 | // H264 PPS | ||
| 115 | writer.WriteU(1, 24); | ||
| 116 | writer.WriteU(0, 1); | ||
| 117 | writer.WriteU(3, 2); | ||
| 118 | writer.WriteU(8, 5); | ||
| 119 | |||
| 120 | writer.WriteUe(0); | ||
| 121 | writer.WriteUe(0); | ||
| 122 | |||
| 123 | writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); | ||
| 124 | writer.WriteBit(false); | ||
| 125 | writer.WriteUe(0); | ||
| 126 | writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); | ||
| 127 | writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); | ||
| 128 | writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); | ||
| 129 | writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); | ||
| 130 | s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value()); | ||
| 131 | writer.WriteSe(pic_init_qp); | ||
| 132 | writer.WriteSe(0); | ||
| 133 | s32 chroma_qp_index_offset = | ||
| 134 | static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value()); | ||
| 135 | |||
| 136 | writer.WriteSe(chroma_qp_index_offset); | ||
| 137 | writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); | ||
| 138 | writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); | ||
| 139 | writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); | ||
| 140 | writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); | ||
| 141 | |||
| 142 | writer.WriteBit(true); | ||
| 143 | |||
| 144 | for (s32 index = 0; index < 6; index++) { | ||
| 141 | writer.WriteBit(true); | 145 | writer.WriteBit(true); |
| 146 | std::span<const u8> matrix{context.weight_scale}; | ||
| 147 | writer.WriteScalingList(matrix, index * 16, 16); | ||
| 148 | } | ||
| 142 | 149 | ||
| 143 | for (s32 index = 0; index < 6; index++) { | 150 | if (context.h264_parameter_set.transform_8x8_mode_flag) { |
| 151 | for (s32 index = 0; index < 2; index++) { | ||
| 144 | writer.WriteBit(true); | 152 | writer.WriteBit(true); |
| 145 | const auto matrix_x4 = | 153 | std::span<const u8> matrix{context.weight_scale_8x8}; |
| 146 | std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); | 154 | writer.WriteScalingList(matrix, index * 64, 64); |
| 147 | writer.WriteScalingList(matrix_x4, index * 16, 16); | ||
| 148 | } | ||
| 149 | |||
| 150 | if (context.h264_parameter_set.transform_8x8_mode_flag) { | ||
| 151 | for (s32 index = 0; index < 2; index++) { | ||
| 152 | writer.WriteBit(true); | ||
| 153 | const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(), | ||
| 154 | context.scaling_matrix_8.end()); | ||
| 155 | |||
| 156 | writer.WriteScalingList(matrix_x8, index * 64, 64); | ||
| 157 | } | ||
| 158 | } | 155 | } |
| 156 | } | ||
| 159 | 157 | ||
| 160 | s32 chroma_qp_index_offset2 = | 158 | s32 chroma_qp_index_offset2 = |
| 161 | static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); | 159 | static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); |
| 162 | chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; | ||
| 163 | 160 | ||
| 164 | writer.WriteSe(chroma_qp_index_offset2); | 161 | writer.WriteSe(chroma_qp_index_offset2); |
| 165 | 162 | ||
| 166 | writer.End(); | 163 | writer.End(); |
| 167 | 164 | ||
| 168 | const auto& encoded_header = writer.GetByteArray(); | 165 | const auto& encoded_header = writer.GetByteArray(); |
| 169 | frame.resize(encoded_header.size() + context.frame_data_size); | 166 | frame.resize(encoded_header.size() + context.stream_len); |
| 170 | std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); | 167 | std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); |
| 171 | 168 | ||
| 172 | gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, | 169 | gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, |
| 173 | frame.data() + encoded_header.size(), | 170 | frame.data() + encoded_header.size(), context.stream_len); |
| 174 | context.frame_data_size); | ||
| 175 | } | ||
| 176 | 171 | ||
| 177 | return frame; | 172 | return frame; |
| 178 | } | 173 | } |
| @@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) { | |||
| 202 | WriteBits(state ? 1 : 0, 1); | 197 | WriteBits(state ? 1 : 0, 1); |
| 203 | } | 198 | } |
| 204 | 199 | ||
| 205 | void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { | 200 | void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { |
| 206 | std::vector<u8> scan(count); | 201 | std::vector<u8> scan(count); |
| 207 | if (count == 16) { | 202 | if (count == 16) { |
| 208 | std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); | 203 | std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); |
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 0f3a1d9f3..bfe84a472 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h | |||
| @@ -20,7 +20,9 @@ | |||
| 20 | 20 | ||
| 21 | #pragma once | 21 | #pragma once |
| 22 | 22 | ||
| 23 | #include <span> | ||
| 23 | #include <vector> | 24 | #include <vector> |
| 25 | #include "common/bit_field.h" | ||
| 24 | #include "common/common_funcs.h" | 26 | #include "common/common_funcs.h" |
| 25 | #include "common/common_types.h" | 27 | #include "common/common_types.h" |
| 26 | #include "video_core/command_classes/nvdec_common.h" | 28 | #include "video_core/command_classes/nvdec_common.h" |
| @@ -48,7 +50,7 @@ public: | |||
| 48 | 50 | ||
| 49 | /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification | 51 | /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification |
| 50 | /// Writes the scaling matrices of the sream | 52 | /// Writes the scaling matrices of the sream |
| 51 | void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); | 53 | void WriteScalingList(std::span<const u8> list, s32 start, s32 count); |
| 52 | 54 | ||
| 53 | /// Return the bitstream as a vector. | 55 | /// Return the bitstream as a vector. |
| 54 | [[nodiscard]] std::vector<u8>& GetByteArray(); | 56 | [[nodiscard]] std::vector<u8>& GetByteArray(); |
| @@ -78,40 +80,110 @@ public: | |||
| 78 | const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); | 80 | const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); |
| 79 | 81 | ||
| 80 | private: | 82 | private: |
| 83 | std::vector<u8> frame; | ||
| 84 | GPU& gpu; | ||
| 85 | |||
| 81 | struct H264ParameterSet { | 86 | struct H264ParameterSet { |
| 82 | u32 log2_max_pic_order_cnt{}; | 87 | s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 |
| 83 | u32 delta_pic_order_always_zero_flag{}; | 88 | s32 delta_pic_order_always_zero_flag; ///< 0x04 |
| 84 | u32 frame_mbs_only_flag{}; | 89 | s32 frame_mbs_only_flag; ///< 0x08 |
| 85 | u32 pic_width_in_mbs{}; | 90 | u32 pic_width_in_mbs; ///< 0x0C |
| 86 | u32 pic_height_in_map_units{}; | 91 | u32 frame_height_in_map_units; ///< 0x10 |
| 87 | INSERT_PADDING_WORDS(1); | 92 | union { ///< 0x14 |
| 88 | u32 entropy_coding_mode_flag{}; | 93 | BitField<0, 2, u32> tile_format; |
| 89 | u32 bottom_field_pic_order_flag{}; | 94 | BitField<2, 3, u32> gob_height; |
| 90 | u32 num_refidx_l0_default_active{}; | 95 | }; |
| 91 | u32 num_refidx_l1_default_active{}; | 96 | u32 entropy_coding_mode_flag; ///< 0x18 |
| 92 | u32 deblocking_filter_control_flag{}; | 97 | s32 pic_order_present_flag; ///< 0x1C |
| 93 | u32 redundant_pic_count_flag{}; | 98 | s32 num_refidx_l0_default_active; ///< 0x20 |
| 94 | u32 transform_8x8_mode_flag{}; | 99 | s32 num_refidx_l1_default_active; ///< 0x24 |
| 95 | INSERT_PADDING_WORDS(9); | 100 | s32 deblocking_filter_control_present_flag; ///< 0x28 |
| 96 | u64 flags{}; | 101 | s32 redundant_pic_cnt_present_flag; ///< 0x2C |
| 97 | u32 frame_number{}; | 102 | u32 transform_8x8_mode_flag; ///< 0x30 |
| 98 | u32 frame_number2{}; | 103 | u32 pitch_luma; ///< 0x34 |
| 104 | u32 pitch_chroma; ///< 0x38 | ||
| 105 | u32 luma_top_offset; ///< 0x3C | ||
| 106 | u32 luma_bot_offset; ///< 0x40 | ||
| 107 | u32 luma_frame_offset; ///< 0x44 | ||
| 108 | u32 chroma_top_offset; ///< 0x48 | ||
| 109 | u32 chroma_bot_offset; ///< 0x4C | ||
| 110 | u32 chroma_frame_offset; ///< 0x50 | ||
| 111 | u32 hist_buffer_size; ///< 0x54 | ||
| 112 | union { ///< 0x58 | ||
| 113 | union { | ||
| 114 | BitField<0, 1, u64> mbaff_frame; | ||
| 115 | BitField<1, 1, u64> direct_8x8_inference; | ||
| 116 | BitField<2, 1, u64> weighted_pred; | ||
| 117 | BitField<3, 1, u64> constrained_intra_pred; | ||
| 118 | BitField<4, 1, u64> ref_pic; | ||
| 119 | BitField<5, 1, u64> field_pic; | ||
| 120 | BitField<6, 1, u64> bottom_field; | ||
| 121 | BitField<7, 1, u64> second_field; | ||
| 122 | } flags; | ||
| 123 | BitField<8, 4, u64> log2_max_frame_num_minus4; | ||
| 124 | BitField<12, 2, u64> chroma_format_idc; | ||
| 125 | BitField<14, 2, u64> pic_order_cnt_type; | ||
| 126 | BitField<16, 6, s64> pic_init_qp_minus26; | ||
| 127 | BitField<22, 5, s64> chroma_qp_index_offset; | ||
| 128 | BitField<27, 5, s64> second_chroma_qp_index_offset; | ||
| 129 | BitField<32, 2, u64> weighted_bipred_idc; | ||
| 130 | BitField<34, 7, u64> curr_pic_idx; | ||
| 131 | BitField<41, 5, u64> curr_col_idx; | ||
| 132 | BitField<46, 16, u64> frame_number; | ||
| 133 | BitField<62, 1, u64> frame_surfaces; | ||
| 134 | BitField<63, 1, u64> output_memory_layout; | ||
| 135 | }; | ||
| 99 | }; | 136 | }; |
| 100 | static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); | 137 | static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); |
| 101 | 138 | ||
| 102 | struct H264DecoderContext { | 139 | struct H264DecoderContext { |
| 103 | INSERT_PADDING_BYTES(0x48); | 140 | INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000 |
| 104 | u32 frame_data_size{}; | 141 | u32 stream_len; ///< 0x0048 |
| 105 | INSERT_PADDING_BYTES(0xc); | 142 | INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C |
| 106 | H264ParameterSet h264_parameter_set{}; | 143 | H264ParameterSet h264_parameter_set; ///< 0x0058 |
| 107 | INSERT_PADDING_BYTES(0x100); | 144 | INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8 |
| 108 | std::array<u8, 0x60> scaling_matrix_4; | 145 | std::array<u8, 0x60> weight_scale; ///< 0x01C0 |
| 109 | std::array<u8, 0x80> scaling_matrix_8; | 146 | std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220 |
| 110 | }; | 147 | }; |
| 111 | static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); | 148 | static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); |
| 112 | 149 | ||
| 113 | std::vector<u8> frame; | 150 | #define ASSERT_POSITION(field_name, position) \ |
| 114 | GPU& gpu; | 151 | static_assert(offsetof(H264ParameterSet, field_name) == position, \ |
| 152 | "Field " #field_name " has invalid position") | ||
| 153 | |||
| 154 | ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); | ||
| 155 | ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); | ||
| 156 | ASSERT_POSITION(frame_mbs_only_flag, 0x08); | ||
| 157 | ASSERT_POSITION(pic_width_in_mbs, 0x0C); | ||
| 158 | ASSERT_POSITION(frame_height_in_map_units, 0x10); | ||
| 159 | ASSERT_POSITION(tile_format, 0x14); | ||
| 160 | ASSERT_POSITION(entropy_coding_mode_flag, 0x18); | ||
| 161 | ASSERT_POSITION(pic_order_present_flag, 0x1C); | ||
| 162 | ASSERT_POSITION(num_refidx_l0_default_active, 0x20); | ||
| 163 | ASSERT_POSITION(num_refidx_l1_default_active, 0x24); | ||
| 164 | ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); | ||
| 165 | ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); | ||
| 166 | ASSERT_POSITION(transform_8x8_mode_flag, 0x30); | ||
| 167 | ASSERT_POSITION(pitch_luma, 0x34); | ||
| 168 | ASSERT_POSITION(pitch_chroma, 0x38); | ||
| 169 | ASSERT_POSITION(luma_top_offset, 0x3C); | ||
| 170 | ASSERT_POSITION(luma_bot_offset, 0x40); | ||
| 171 | ASSERT_POSITION(luma_frame_offset, 0x44); | ||
| 172 | ASSERT_POSITION(chroma_top_offset, 0x48); | ||
| 173 | ASSERT_POSITION(chroma_bot_offset, 0x4C); | ||
| 174 | ASSERT_POSITION(chroma_frame_offset, 0x50); | ||
| 175 | ASSERT_POSITION(hist_buffer_size, 0x54); | ||
| 176 | ASSERT_POSITION(flags, 0x58); | ||
| 177 | #undef ASSERT_POSITION | ||
| 178 | |||
| 179 | #define ASSERT_POSITION(field_name, position) \ | ||
| 180 | static_assert(offsetof(H264DecoderContext, field_name) == position, \ | ||
| 181 | "Field " #field_name " has invalid position") | ||
| 182 | |||
| 183 | ASSERT_POSITION(stream_len, 0x48); | ||
| 184 | ASSERT_POSITION(h264_parameter_set, 0x58); | ||
| 185 | ASSERT_POSITION(weight_scale, 0x1C0); | ||
| 186 | #undef ASSERT_POSITION | ||
| 115 | }; | 187 | }; |
| 116 | 188 | ||
| 117 | } // namespace Decoder | 189 | } // namespace Decoder |
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 29bb31418..902bc2a98 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp | |||
| @@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ | |||
| 354 | } | 354 | } |
| 355 | 355 | ||
| 356 | Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { | 356 | Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { |
| 357 | PictureInfo picture_info{}; | 357 | PictureInfo picture_info; |
| 358 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); | 358 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); |
| 359 | Vp9PictureInfo vp9_info = picture_info.Convert(); | 359 | Vp9PictureInfo vp9_info = picture_info.Convert(); |
| 360 | 360 | ||
| @@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) | |||
| 370 | } | 370 | } |
| 371 | 371 | ||
| 372 | void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { | 372 | void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { |
| 373 | EntropyProbs entropy{}; | 373 | EntropyProbs entropy; |
| 374 | gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); | 374 | gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); |
| 375 | entropy.Convert(dst); | 375 | entropy.Convert(dst); |
| 376 | } | 376 | } |
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 139501a1c..2da14f3ca 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h | |||
| @@ -15,10 +15,10 @@ class GPU; | |||
| 15 | 15 | ||
| 16 | namespace Decoder { | 16 | namespace Decoder { |
| 17 | struct Vp9FrameDimensions { | 17 | struct Vp9FrameDimensions { |
| 18 | s16 width{}; | 18 | s16 width; |
| 19 | s16 height{}; | 19 | s16 height; |
| 20 | s16 luma_pitch{}; | 20 | s16 luma_pitch; |
| 21 | s16 chroma_pitch{}; | 21 | s16 chroma_pitch; |
| 22 | }; | 22 | }; |
| 23 | static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); | 23 | static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); |
| 24 | 24 | ||
| @@ -49,87 +49,87 @@ enum class TxMode { | |||
| 49 | }; | 49 | }; |
| 50 | 50 | ||
| 51 | struct Segmentation { | 51 | struct Segmentation { |
| 52 | u8 enabled{}; | 52 | u8 enabled; |
| 53 | u8 update_map{}; | 53 | u8 update_map; |
| 54 | u8 temporal_update{}; | 54 | u8 temporal_update; |
| 55 | u8 abs_delta{}; | 55 | u8 abs_delta; |
| 56 | std::array<u32, 8> feature_mask{}; | 56 | std::array<u32, 8> feature_mask; |
| 57 | std::array<std::array<s16, 4>, 8> feature_data{}; | 57 | std::array<std::array<s16, 4>, 8> feature_data; |
| 58 | }; | 58 | }; |
| 59 | static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); | 59 | static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); |
| 60 | 60 | ||
| 61 | struct LoopFilter { | 61 | struct LoopFilter { |
| 62 | u8 mode_ref_delta_enabled{}; | 62 | u8 mode_ref_delta_enabled; |
| 63 | std::array<s8, 4> ref_deltas{}; | 63 | std::array<s8, 4> ref_deltas; |
| 64 | std::array<s8, 2> mode_deltas{}; | 64 | std::array<s8, 2> mode_deltas; |
| 65 | }; | 65 | }; |
| 66 | static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); | 66 | static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); |
| 67 | 67 | ||
| 68 | struct Vp9EntropyProbs { | 68 | struct Vp9EntropyProbs { |
| 69 | std::array<u8, 36> y_mode_prob{}; | 69 | std::array<u8, 36> y_mode_prob; ///< 0x0000 |
| 70 | std::array<u8, 64> partition_prob{}; | 70 | std::array<u8, 64> partition_prob; ///< 0x0024 |
| 71 | std::array<u8, 1728> coef_probs{}; | 71 | std::array<u8, 1728> coef_probs; ///< 0x0064 |
| 72 | std::array<u8, 8> switchable_interp_prob{}; | 72 | std::array<u8, 8> switchable_interp_prob; ///< 0x0724 |
| 73 | std::array<u8, 28> inter_mode_prob{}; | 73 | std::array<u8, 28> inter_mode_prob; ///< 0x072C |
| 74 | std::array<u8, 4> intra_inter_prob{}; | 74 | std::array<u8, 4> intra_inter_prob; ///< 0x0748 |
| 75 | std::array<u8, 5> comp_inter_prob{}; | 75 | std::array<u8, 5> comp_inter_prob; ///< 0x074C |
| 76 | std::array<u8, 10> single_ref_prob{}; | 76 | std::array<u8, 10> single_ref_prob; ///< 0x0751 |
| 77 | std::array<u8, 5> comp_ref_prob{}; | 77 | std::array<u8, 5> comp_ref_prob; ///< 0x075B |
| 78 | std::array<u8, 6> tx_32x32_prob{}; | 78 | std::array<u8, 6> tx_32x32_prob; ///< 0x0760 |
| 79 | std::array<u8, 4> tx_16x16_prob{}; | 79 | std::array<u8, 4> tx_16x16_prob; ///< 0x0766 |
| 80 | std::array<u8, 2> tx_8x8_prob{}; | 80 | std::array<u8, 2> tx_8x8_prob; ///< 0x076A |
| 81 | std::array<u8, 3> skip_probs{}; | 81 | std::array<u8, 3> skip_probs; ///< 0x076C |
| 82 | std::array<u8, 3> joints{}; | 82 | std::array<u8, 3> joints; ///< 0x076F |
| 83 | std::array<u8, 2> sign{}; | 83 | std::array<u8, 2> sign; ///< 0x0772 |
| 84 | std::array<u8, 20> classes{}; | 84 | std::array<u8, 20> classes; ///< 0x0774 |
| 85 | std::array<u8, 2> class_0{}; | 85 | std::array<u8, 2> class_0; ///< 0x0788 |
| 86 | std::array<u8, 20> prob_bits{}; | 86 | std::array<u8, 20> prob_bits; ///< 0x078A |
| 87 | std::array<u8, 12> class_0_fr{}; | 87 | std::array<u8, 12> class_0_fr; ///< 0x079E |
| 88 | std::array<u8, 6> fr{}; | 88 | std::array<u8, 6> fr; ///< 0x07AA |
| 89 | std::array<u8, 2> class_0_hp{}; | 89 | std::array<u8, 2> class_0_hp; ///< 0x07B0 |
| 90 | std::array<u8, 2> high_precision{}; | 90 | std::array<u8, 2> high_precision; ///< 0x07B2 |
| 91 | }; | 91 | }; |
| 92 | static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); | 92 | static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); |
| 93 | 93 | ||
| 94 | struct Vp9PictureInfo { | 94 | struct Vp9PictureInfo { |
| 95 | bool is_key_frame{}; | 95 | bool is_key_frame; |
| 96 | bool intra_only{}; | 96 | bool intra_only; |
| 97 | bool last_frame_was_key{}; | 97 | bool last_frame_was_key; |
| 98 | bool frame_size_changed{}; | 98 | bool frame_size_changed; |
| 99 | bool error_resilient_mode{}; | 99 | bool error_resilient_mode; |
| 100 | bool last_frame_shown{}; | 100 | bool last_frame_shown; |
| 101 | bool show_frame{}; | 101 | bool show_frame; |
| 102 | std::array<s8, 4> ref_frame_sign_bias{}; | 102 | std::array<s8, 4> ref_frame_sign_bias; |
| 103 | s32 base_q_index{}; | 103 | s32 base_q_index; |
| 104 | s32 y_dc_delta_q{}; | 104 | s32 y_dc_delta_q; |
| 105 | s32 uv_dc_delta_q{}; | 105 | s32 uv_dc_delta_q; |
| 106 | s32 uv_ac_delta_q{}; | 106 | s32 uv_ac_delta_q; |
| 107 | bool lossless{}; | 107 | bool lossless; |
| 108 | s32 transform_mode{}; | 108 | s32 transform_mode; |
| 109 | bool allow_high_precision_mv{}; | 109 | bool allow_high_precision_mv; |
| 110 | s32 interp_filter{}; | 110 | s32 interp_filter; |
| 111 | s32 reference_mode{}; | 111 | s32 reference_mode; |
| 112 | s8 comp_fixed_ref{}; | 112 | s8 comp_fixed_ref; |
| 113 | std::array<s8, 2> comp_var_ref{}; | 113 | std::array<s8, 2> comp_var_ref; |
| 114 | s32 log2_tile_cols{}; | 114 | s32 log2_tile_cols; |
| 115 | s32 log2_tile_rows{}; | 115 | s32 log2_tile_rows; |
| 116 | bool segment_enabled{}; | 116 | bool segment_enabled; |
| 117 | bool segment_map_update{}; | 117 | bool segment_map_update; |
| 118 | bool segment_map_temporal_update{}; | 118 | bool segment_map_temporal_update; |
| 119 | s32 segment_abs_delta{}; | 119 | s32 segment_abs_delta; |
| 120 | std::array<u32, 8> segment_feature_enable{}; | 120 | std::array<u32, 8> segment_feature_enable; |
| 121 | std::array<std::array<s16, 4>, 8> segment_feature_data{}; | 121 | std::array<std::array<s16, 4>, 8> segment_feature_data; |
| 122 | bool mode_ref_delta_enabled{}; | 122 | bool mode_ref_delta_enabled; |
| 123 | bool use_prev_in_find_mv_refs{}; | 123 | bool use_prev_in_find_mv_refs; |
| 124 | std::array<s8, 4> ref_deltas{}; | 124 | std::array<s8, 4> ref_deltas; |
| 125 | std::array<s8, 2> mode_deltas{}; | 125 | std::array<s8, 2> mode_deltas; |
| 126 | Vp9EntropyProbs entropy{}; | 126 | Vp9EntropyProbs entropy; |
| 127 | Vp9FrameDimensions frame_size{}; | 127 | Vp9FrameDimensions frame_size; |
| 128 | u8 first_level{}; | 128 | u8 first_level; |
| 129 | u8 sharpness_level{}; | 129 | u8 sharpness_level; |
| 130 | u32 bitstream_size{}; | 130 | u32 bitstream_size; |
| 131 | std::array<u64, 4> frame_offsets{}; | 131 | std::array<u64, 4> frame_offsets; |
| 132 | std::array<bool, 4> refresh_frame{}; | 132 | std::array<bool, 4> refresh_frame; |
| 133 | }; | 133 | }; |
| 134 | 134 | ||
| 135 | struct Vp9FrameContainer { | 135 | struct Vp9FrameContainer { |
| @@ -138,35 +138,35 @@ struct Vp9FrameContainer { | |||
| 138 | }; | 138 | }; |
| 139 | 139 | ||
| 140 | struct PictureInfo { | 140 | struct PictureInfo { |
| 141 | INSERT_PADDING_WORDS(12); | 141 | INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00 |
| 142 | u32 bitstream_size{}; | 142 | u32 bitstream_size; ///< 0x30 |
| 143 | INSERT_PADDING_WORDS(5); | 143 | INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34 |
| 144 | Vp9FrameDimensions last_frame_size{}; | 144 | Vp9FrameDimensions last_frame_size; ///< 0x48 |
| 145 | Vp9FrameDimensions golden_frame_size{}; | 145 | Vp9FrameDimensions golden_frame_size; ///< 0x50 |
| 146 | Vp9FrameDimensions alt_frame_size{}; | 146 | Vp9FrameDimensions alt_frame_size; ///< 0x58 |
| 147 | Vp9FrameDimensions current_frame_size{}; | 147 | Vp9FrameDimensions current_frame_size; ///< 0x60 |
| 148 | u32 vp9_flags{}; | 148 | u32 vp9_flags; ///< 0x68 |
| 149 | std::array<s8, 4> ref_frame_sign_bias{}; | 149 | std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C |
| 150 | u8 first_level{}; | 150 | u8 first_level; ///< 0x70 |
| 151 | u8 sharpness_level{}; | 151 | u8 sharpness_level; ///< 0x71 |
| 152 | u8 base_q_index{}; | 152 | u8 base_q_index; ///< 0x72 |
| 153 | u8 y_dc_delta_q{}; | 153 | u8 y_dc_delta_q; ///< 0x73 |
| 154 | u8 uv_ac_delta_q{}; | 154 | u8 uv_ac_delta_q; ///< 0x74 |
| 155 | u8 uv_dc_delta_q{}; | 155 | u8 uv_dc_delta_q; ///< 0x75 |
| 156 | u8 lossless{}; | 156 | u8 lossless; ///< 0x76 |
| 157 | u8 tx_mode{}; | 157 | u8 tx_mode; ///< 0x77 |
| 158 | u8 allow_high_precision_mv{}; | 158 | u8 allow_high_precision_mv; ///< 0x78 |
| 159 | u8 interp_filter{}; | 159 | u8 interp_filter; ///< 0x79 |
| 160 | u8 reference_mode{}; | 160 | u8 reference_mode; ///< 0x7A |
| 161 | s8 comp_fixed_ref{}; | 161 | s8 comp_fixed_ref; ///< 0x7B |
| 162 | std::array<s8, 2> comp_var_ref{}; | 162 | std::array<s8, 2> comp_var_ref; ///< 0x7C |
| 163 | u8 log2_tile_cols{}; | 163 | u8 log2_tile_cols; ///< 0x7E |
| 164 | u8 log2_tile_rows{}; | 164 | u8 log2_tile_rows; ///< 0x7F |
| 165 | Segmentation segmentation{}; | 165 | Segmentation segmentation; ///< 0x80 |
| 166 | LoopFilter loop_filter{}; | 166 | LoopFilter loop_filter; ///< 0xE4 |
| 167 | INSERT_PADDING_BYTES(5); | 167 | INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB |
| 168 | u32 surface_params{}; | 168 | u32 surface_params; ///< 0xF0 |
| 169 | INSERT_PADDING_WORDS(3); | 169 | INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4 |
| 170 | 170 | ||
| 171 | [[nodiscard]] Vp9PictureInfo Convert() const { | 171 | [[nodiscard]] Vp9PictureInfo Convert() const { |
| 172 | return { | 172 | return { |
| @@ -176,6 +176,7 @@ struct PictureInfo { | |||
| 176 | .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, | 176 | .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, |
| 177 | .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, | 177 | .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, |
| 178 | .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, | 178 | .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, |
| 179 | .show_frame = false, | ||
| 179 | .ref_frame_sign_bias = ref_frame_sign_bias, | 180 | .ref_frame_sign_bias = ref_frame_sign_bias, |
| 180 | .base_q_index = base_q_index, | 181 | .base_q_index = base_q_index, |
| 181 | .y_dc_delta_q = y_dc_delta_q, | 182 | .y_dc_delta_q = y_dc_delta_q, |
| @@ -204,45 +205,48 @@ struct PictureInfo { | |||
| 204 | !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), | 205 | !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), |
| 205 | .ref_deltas = loop_filter.ref_deltas, | 206 | .ref_deltas = loop_filter.ref_deltas, |
| 206 | .mode_deltas = loop_filter.mode_deltas, | 207 | .mode_deltas = loop_filter.mode_deltas, |
| 208 | .entropy{}, | ||
| 207 | .frame_size = current_frame_size, | 209 | .frame_size = current_frame_size, |
| 208 | .first_level = first_level, | 210 | .first_level = first_level, |
| 209 | .sharpness_level = sharpness_level, | 211 | .sharpness_level = sharpness_level, |
| 210 | .bitstream_size = bitstream_size, | 212 | .bitstream_size = bitstream_size, |
| 213 | .frame_offsets{}, | ||
| 214 | .refresh_frame{}, | ||
| 211 | }; | 215 | }; |
| 212 | } | 216 | } |
| 213 | }; | 217 | }; |
| 214 | static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); | 218 | static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); |
| 215 | 219 | ||
| 216 | struct EntropyProbs { | 220 | struct EntropyProbs { |
| 217 | INSERT_PADDING_BYTES(1024); | 221 | INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000 |
| 218 | std::array<u8, 28> inter_mode_prob{}; | 222 | std::array<u8, 28> inter_mode_prob; ///< 0x0400 |
| 219 | std::array<u8, 4> intra_inter_prob{}; | 223 | std::array<u8, 4> intra_inter_prob; ///< 0x041C |
| 220 | INSERT_PADDING_BYTES(80); | 224 | INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420 |
| 221 | std::array<u8, 2> tx_8x8_prob{}; | 225 | std::array<u8, 2> tx_8x8_prob; ///< 0x0470 |
| 222 | std::array<u8, 4> tx_16x16_prob{}; | 226 | std::array<u8, 4> tx_16x16_prob; ///< 0x0472 |
| 223 | std::array<u8, 6> tx_32x32_prob{}; | 227 | std::array<u8, 6> tx_32x32_prob; ///< 0x0476 |
| 224 | std::array<u8, 4> y_mode_prob_e8{}; | 228 | std::array<u8, 4> y_mode_prob_e8; ///< 0x047C |
| 225 | std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; | 229 | std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480 |
| 226 | INSERT_PADDING_BYTES(64); | 230 | INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0 |
| 227 | std::array<u8, 64> partition_prob{}; | 231 | std::array<u8, 64> partition_prob; ///< 0x04E0 |
| 228 | INSERT_PADDING_BYTES(10); | 232 | INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520 |
| 229 | std::array<u8, 8> switchable_interp_prob{}; | 233 | std::array<u8, 8> switchable_interp_prob; ///< 0x052A |
| 230 | std::array<u8, 5> comp_inter_prob{}; | 234 | std::array<u8, 5> comp_inter_prob; ///< 0x0532 |
| 231 | std::array<u8, 3> skip_probs{}; | 235 | std::array<u8, 3> skip_probs; ///< 0x0537 |
| 232 | INSERT_PADDING_BYTES(1); | 236 | INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A |
| 233 | std::array<u8, 3> joints{}; | 237 | std::array<u8, 3> joints; ///< 0x053B |
| 234 | std::array<u8, 2> sign{}; | 238 | std::array<u8, 2> sign; ///< 0x053E |
| 235 | std::array<u8, 2> class_0{}; | 239 | std::array<u8, 2> class_0; ///< 0x0540 |
| 236 | std::array<u8, 6> fr{}; | 240 | std::array<u8, 6> fr; ///< 0x0542 |
| 237 | std::array<u8, 2> class_0_hp{}; | 241 | std::array<u8, 2> class_0_hp; ///< 0x0548 |
| 238 | std::array<u8, 2> high_precision{}; | 242 | std::array<u8, 2> high_precision; ///< 0x054A |
| 239 | std::array<u8, 20> classes{}; | 243 | std::array<u8, 20> classes; ///< 0x054C |
| 240 | std::array<u8, 12> class_0_fr{}; | 244 | std::array<u8, 12> class_0_fr; ///< 0x0560 |
| 241 | std::array<u8, 20> pred_bits{}; | 245 | std::array<u8, 20> pred_bits; ///< 0x056C |
| 242 | std::array<u8, 10> single_ref_prob{}; | 246 | std::array<u8, 10> single_ref_prob; ///< 0x0580 |
| 243 | std::array<u8, 5> comp_ref_prob{}; | 247 | std::array<u8, 5> comp_ref_prob; ///< 0x058A |
| 244 | INSERT_PADDING_BYTES(17); | 248 | INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F |
| 245 | std::array<u8, 2304> coef_probs{}; | 249 | std::array<u8, 2304> coef_probs; ///< 0x05A0 |
| 246 | 250 | ||
| 247 | void Convert(Vp9EntropyProbs& fc) { | 251 | void Convert(Vp9EntropyProbs& fc) { |
| 248 | fc.inter_mode_prob = inter_mode_prob; | 252 | fc.inter_mode_prob = inter_mode_prob; |
| @@ -293,10 +297,45 @@ struct RefPoolElement { | |||
| 293 | }; | 297 | }; |
| 294 | 298 | ||
| 295 | struct FrameContexts { | 299 | struct FrameContexts { |
| 296 | s64 from{}; | 300 | s64 from; |
| 297 | bool adapted{}; | 301 | bool adapted; |
| 298 | Vp9EntropyProbs probs{}; | 302 | Vp9EntropyProbs probs; |
| 299 | }; | 303 | }; |
| 300 | 304 | ||
| 305 | #define ASSERT_POSITION(field_name, position) \ | ||
| 306 | static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \ | ||
| 307 | "Field " #field_name " has invalid position") | ||
| 308 | |||
| 309 | ASSERT_POSITION(partition_prob, 0x0024); | ||
| 310 | ASSERT_POSITION(switchable_interp_prob, 0x0724); | ||
| 311 | ASSERT_POSITION(sign, 0x0772); | ||
| 312 | ASSERT_POSITION(class_0_fr, 0x079E); | ||
| 313 | ASSERT_POSITION(high_precision, 0x07B2); | ||
| 314 | #undef ASSERT_POSITION | ||
| 315 | |||
| 316 | #define ASSERT_POSITION(field_name, position) \ | ||
| 317 | static_assert(offsetof(PictureInfo, field_name) == position, \ | ||
| 318 | "Field " #field_name " has invalid position") | ||
| 319 | |||
| 320 | ASSERT_POSITION(bitstream_size, 0x30); | ||
| 321 | ASSERT_POSITION(last_frame_size, 0x48); | ||
| 322 | ASSERT_POSITION(first_level, 0x70); | ||
| 323 | ASSERT_POSITION(segmentation, 0x80); | ||
| 324 | ASSERT_POSITION(loop_filter, 0xE4); | ||
| 325 | ASSERT_POSITION(surface_params, 0xF0); | ||
| 326 | #undef ASSERT_POSITION | ||
| 327 | |||
| 328 | #define ASSERT_POSITION(field_name, position) \ | ||
| 329 | static_assert(offsetof(EntropyProbs, field_name) == position, \ | ||
| 330 | "Field " #field_name " has invalid position") | ||
| 331 | |||
| 332 | ASSERT_POSITION(inter_mode_prob, 0x400); | ||
| 333 | ASSERT_POSITION(tx_8x8_prob, 0x470); | ||
| 334 | ASSERT_POSITION(partition_prob, 0x4E0); | ||
| 335 | ASSERT_POSITION(class_0, 0x540); | ||
| 336 | ASSERT_POSITION(class_0_fr, 0x560); | ||
| 337 | ASSERT_POSITION(coef_probs, 0x5A0); | ||
| 338 | #undef ASSERT_POSITION | ||
| 339 | |||
| 301 | }; // namespace Decoder | 340 | }; // namespace Decoder |
| 302 | }; // namespace Tegra | 341 | }; // namespace Tegra |
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index e4f919afd..b5e3b70fc 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp | |||
| @@ -8,22 +8,21 @@ | |||
| 8 | 8 | ||
| 9 | namespace Tegra { | 9 | namespace Tegra { |
| 10 | 10 | ||
| 11 | Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} | 11 | #define NVDEC_REG_INDEX(field_name) \ |
| 12 | (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) | ||
| 13 | |||
| 14 | Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {} | ||
| 12 | 15 | ||
| 13 | Nvdec::~Nvdec() = default; | 16 | Nvdec::~Nvdec() = default; |
| 14 | 17 | ||
| 15 | void Nvdec::ProcessMethod(Method method, u32 argument) { | 18 | void Nvdec::ProcessMethod(u32 method, u32 argument) { |
| 16 | if (method == Method::SetVideoCodec) { | 19 | state.reg_array[method] = static_cast<u64>(argument) << 8; |
| 17 | codec->StateWrite(static_cast<u32>(method), argument); | ||
| 18 | } else { | ||
| 19 | codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8); | ||
| 20 | } | ||
| 21 | 20 | ||
| 22 | switch (method) { | 21 | switch (method) { |
| 23 | case Method::SetVideoCodec: | 22 | case NVDEC_REG_INDEX(set_codec_id): |
| 24 | codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); | 23 | codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); |
| 25 | break; | 24 | break; |
| 26 | case Method::Execute: | 25 | case NVDEC_REG_INDEX(execute): |
| 27 | Execute(); | 26 | Execute(); |
| 28 | break; | 27 | break; |
| 29 | } | 28 | } |
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index e66be80b8..6e1da0b04 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h | |||
| @@ -14,16 +14,11 @@ class GPU; | |||
| 14 | 14 | ||
| 15 | class Nvdec { | 15 | class Nvdec { |
| 16 | public: | 16 | public: |
| 17 | enum class Method : u32 { | ||
| 18 | SetVideoCodec = 0x80, | ||
| 19 | Execute = 0xc0, | ||
| 20 | }; | ||
| 21 | |||
| 22 | explicit Nvdec(GPU& gpu); | 17 | explicit Nvdec(GPU& gpu); |
| 23 | ~Nvdec(); | 18 | ~Nvdec(); |
| 24 | 19 | ||
| 25 | /// Writes the method into the state, Invoke Execute() if encountered | 20 | /// Writes the method into the state, Invoke Execute() if encountered |
| 26 | void ProcessMethod(Method method, u32 argument); | 21 | void ProcessMethod(u32 method, u32 argument); |
| 27 | 22 | ||
| 28 | /// Return most recently decoded frame | 23 | /// Return most recently decoded frame |
| 29 | [[nodiscard]] AVFramePtr GetFrame(); | 24 | [[nodiscard]] AVFramePtr GetFrame(); |
| @@ -33,6 +28,7 @@ private: | |||
| 33 | void Execute(); | 28 | void Execute(); |
| 34 | 29 | ||
| 35 | GPU& gpu; | 30 | GPU& gpu; |
| 31 | NvdecCommon::NvdecRegisters state; | ||
| 36 | std::unique_ptr<Codec> codec; | 32 | std::unique_ptr<Codec> codec; |
| 37 | }; | 33 | }; |
| 38 | } // namespace Tegra | 34 | } // namespace Tegra |
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h index 01b5e086d..6a24e00a0 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/command_classes/nvdec_common.h | |||
| @@ -4,40 +4,13 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/bit_field.h" | ||
| 7 | #include "common/common_funcs.h" | 8 | #include "common/common_funcs.h" |
| 8 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 9 | 10 | ||
| 10 | namespace Tegra::NvdecCommon { | 11 | namespace Tegra::NvdecCommon { |
| 11 | 12 | ||
| 12 | struct NvdecRegisters { | 13 | enum class VideoCodec : u64 { |
| 13 | INSERT_PADDING_WORDS(256); | ||
| 14 | u64 set_codec_id{}; | ||
| 15 | INSERT_PADDING_WORDS(254); | ||
| 16 | u64 set_platform_id{}; | ||
| 17 | u64 picture_info_offset{}; | ||
| 18 | u64 frame_bitstream_offset{}; | ||
| 19 | u64 frame_number{}; | ||
| 20 | u64 h264_slice_data_offsets{}; | ||
| 21 | u64 h264_mv_dump_offset{}; | ||
| 22 | INSERT_PADDING_WORDS(6); | ||
| 23 | u64 frame_stats_offset{}; | ||
| 24 | u64 h264_last_surface_luma_offset{}; | ||
| 25 | u64 h264_last_surface_chroma_offset{}; | ||
| 26 | std::array<u64, 17> surface_luma_offset{}; | ||
| 27 | std::array<u64, 17> surface_chroma_offset{}; | ||
| 28 | INSERT_PADDING_WORDS(132); | ||
| 29 | u64 vp9_entropy_probs_offset{}; | ||
| 30 | u64 vp9_backward_updates_offset{}; | ||
| 31 | u64 vp9_last_frame_segmap_offset{}; | ||
| 32 | u64 vp9_curr_frame_segmap_offset{}; | ||
| 33 | INSERT_PADDING_WORDS(2); | ||
| 34 | u64 vp9_last_frame_mvs_offset{}; | ||
| 35 | u64 vp9_curr_frame_mvs_offset{}; | ||
| 36 | INSERT_PADDING_WORDS(2); | ||
| 37 | }; | ||
| 38 | static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); | ||
| 39 | |||
| 40 | enum class VideoCodec : u32 { | ||
| 41 | None = 0x0, | 14 | None = 0x0, |
| 42 | H264 = 0x3, | 15 | H264 = 0x3, |
| 43 | Vp8 = 0x5, | 16 | Vp8 = 0x5, |
| @@ -45,4 +18,76 @@ enum class VideoCodec : u32 { | |||
| 45 | Vp9 = 0x9, | 18 | Vp9 = 0x9, |
| 46 | }; | 19 | }; |
| 47 | 20 | ||
| 21 | // NVDEC should use a 32-bit address space, but is mapped to 64-bit, | ||
| 22 | // doubling the sizes here is compensating for that. | ||
| 23 | struct NvdecRegisters { | ||
| 24 | static constexpr std::size_t NUM_REGS = 0x178; | ||
| 25 | |||
| 26 | union { | ||
| 27 | struct { | ||
| 28 | INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000 | ||
| 29 | VideoCodec set_codec_id; ///< 0x0400 | ||
| 30 | INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408 | ||
| 31 | u64 execute; ///< 0x0600 | ||
| 32 | INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608 | ||
| 33 | struct { ///< 0x0800 | ||
| 34 | union { | ||
| 35 | BitField<0, 3, VideoCodec> codec; | ||
| 36 | BitField<4, 1, u64> gp_timer_on; | ||
| 37 | BitField<13, 1, u64> mb_timer_on; | ||
| 38 | BitField<14, 1, u64> intra_frame_pslc; | ||
| 39 | BitField<17, 1, u64> all_intra_frame; | ||
| 40 | }; | ||
| 41 | } control_params; | ||
| 42 | u64 picture_info_offset; ///< 0x0808 | ||
| 43 | u64 frame_bitstream_offset; ///< 0x0810 | ||
| 44 | u64 frame_number; ///< 0x0818 | ||
| 45 | u64 h264_slice_data_offsets; ///< 0x0820 | ||
| 46 | u64 h264_mv_dump_offset; ///< 0x0828 | ||
| 47 | INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830 | ||
| 48 | u64 frame_stats_offset; ///< 0x0848 | ||
| 49 | u64 h264_last_surface_luma_offset; ///< 0x0850 | ||
| 50 | u64 h264_last_surface_chroma_offset; ///< 0x0858 | ||
| 51 | std::array<u64, 17> surface_luma_offset; ///< 0x0860 | ||
| 52 | std::array<u64, 17> surface_chroma_offset; ///< 0x08E8 | ||
| 53 | INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970 | ||
| 54 | u64 vp9_entropy_probs_offset; ///< 0x0B80 | ||
| 55 | u64 vp9_backward_updates_offset; ///< 0x0B88 | ||
| 56 | u64 vp9_last_frame_segmap_offset; ///< 0x0B90 | ||
| 57 | u64 vp9_curr_frame_segmap_offset; ///< 0x0B98 | ||
| 58 | INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0 | ||
| 59 | u64 vp9_last_frame_mvs_offset; ///< 0x0BA8 | ||
| 60 | u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0 | ||
| 61 | INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8 | ||
| 62 | }; | ||
| 63 | std::array<u64, NUM_REGS> reg_array; | ||
| 64 | }; | ||
| 65 | }; | ||
| 66 | static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); | ||
| 67 | |||
| 68 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 69 | static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \ | ||
| 70 | "Field " #field_name " has invalid position") | ||
| 71 | |||
| 72 | ASSERT_REG_POSITION(set_codec_id, 0x80); | ||
| 73 | ASSERT_REG_POSITION(execute, 0xC0); | ||
| 74 | ASSERT_REG_POSITION(control_params, 0x100); | ||
| 75 | ASSERT_REG_POSITION(picture_info_offset, 0x101); | ||
| 76 | ASSERT_REG_POSITION(frame_bitstream_offset, 0x102); | ||
| 77 | ASSERT_REG_POSITION(frame_number, 0x103); | ||
| 78 | ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104); | ||
| 79 | ASSERT_REG_POSITION(frame_stats_offset, 0x109); | ||
| 80 | ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); | ||
| 81 | ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); | ||
| 82 | ASSERT_REG_POSITION(surface_luma_offset, 0x10C); | ||
| 83 | ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); | ||
| 84 | ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); | ||
| 85 | ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); | ||
| 86 | ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); | ||
| 87 | ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); | ||
| 88 | ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); | ||
| 89 | ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); | ||
| 90 | |||
| 91 | #undef ASSERT_REG_POSITION | ||
| 92 | |||
| 48 | } // namespace Tegra::NvdecCommon | 93 | } // namespace Tegra::NvdecCommon |