summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/CMakeLists.txt1
-rwxr-xr-xsrc/input_common/analog_from_button.cpp1
-rw-r--r--src/video_core/cdma_pusher.cpp3
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp85
-rw-r--r--src/video_core/command_classes/codecs/codec.h12
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp207
-rw-r--r--src/video_core/command_classes/codecs/h264.h132
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp4
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h307
-rw-r--r--src/video_core/command_classes/nvdec.cpp17
-rw-r--r--src/video_core/command_classes/nvdec.h8
-rw-r--r--src/video_core/command_classes/nvdec_common.h103
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp26
-rw-r--r--src/video_core/texture_cache/texture_cache.h3
-rw-r--r--src/yuzu/debugger/profiler.cpp9
15 files changed, 555 insertions, 363 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f30dd49a3..1c3dde31d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -49,6 +49,7 @@ if (MSVC)
49 /W3 49 /W3
50 /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled 50 /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled
51 /we4101 # 'identifier': unreferenced local variable 51 /we4101 # 'identifier': unreferenced local variable
52 /we4189 # 'identifier': local variable is initialized but not referenced
52 /we4265 # 'class': class has virtual functions, but destructor is not virtual 53 /we4265 # 'class': class has virtual functions, but destructor is not virtual
53 /we4388 # signed/unsigned mismatch 54 /we4388 # signed/unsigned mismatch
54 /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect 55 /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect
diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp
index 100138d11..2fafd077f 100755
--- a/src/input_common/analog_from_button.cpp
+++ b/src/input_common/analog_from_button.cpp
@@ -27,6 +27,7 @@ public:
27 down->SetCallback(callbacks); 27 down->SetCallback(callbacks);
28 left->SetCallback(callbacks); 28 left->SetCallback(callbacks);
29 right->SetCallback(callbacks); 29 right->SetCallback(callbacks);
30 modifier->SetCallback(callbacks);
30 } 31 }
31 32
32 bool IsAngleGreater(float old_angle, float new_angle) const { 33 bool IsAngleGreater(float old_angle, float new_angle) const {
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index a3fda1094..8b86ad050 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
103 case ThiMethod::SetMethod1: 103 case ThiMethod::SetMethod1:
104 LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", 104 LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
105 static_cast<u32>(nvdec_thi_state.method_0)); 105 static_cast<u32>(nvdec_thi_state.method_0));
106 nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), 106 nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data);
107 data);
108 break; 107 break;
109 default: 108 default:
110 break; 109 break;
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index d02dc6260..1b4bbc8ac 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) {
23 av_free(ptr); 23 av_free(ptr);
24} 24}
25 25
26Codec::Codec(GPU& gpu_) 26Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
27 : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), 27 : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
28 vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} 28 vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
29 29
30Codec::~Codec() { 30Codec::~Codec() {
@@ -43,46 +43,48 @@ Codec::~Codec() {
43 avcodec_close(av_codec_ctx); 43 avcodec_close(av_codec_ctx);
44} 44}
45 45
46void Codec::Initialize() {
47 AVCodecID codec{AV_CODEC_ID_NONE};
48 switch (current_codec) {
49 case NvdecCommon::VideoCodec::H264:
50 codec = AV_CODEC_ID_H264;
51 break;
52 case NvdecCommon::VideoCodec::Vp9:
53 codec = AV_CODEC_ID_VP9;
54 break;
55 default:
56 return;
57 }
58 av_codec = avcodec_find_decoder(codec);
59 av_codec_ctx = avcodec_alloc_context3(av_codec);
60 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
61
62 // TODO(ameerj): libavcodec gpu hw acceleration
63
64 const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
65 if (av_error < 0) {
66 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
67 avcodec_close(av_codec_ctx);
68 return;
69 }
70 initialized = true;
71 return;
72}
73
46void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { 74void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
47 if (current_codec != codec) { 75 if (current_codec != codec) {
48 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
49 current_codec = codec; 76 current_codec = codec;
77 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
50 } 78 }
51} 79}
52 80
53void Codec::StateWrite(u32 offset, u64 arguments) {
54 u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
55 std::memcpy(state_offset, &arguments, sizeof(u64));
56}
57
58void Codec::Decode() { 81void Codec::Decode() {
59 bool is_first_frame = false; 82 const bool is_first_frame = !initialized;
60 if (!initialized) { 83 if (!initialized) {
61 if (current_codec == NvdecCommon::VideoCodec::H264) { 84 Initialize();
62 av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
63 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
64 av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
65 } else {
66 LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
67 return;
68 }
69
70 av_codec_ctx = avcodec_alloc_context3(av_codec);
71 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
72
73 // TODO(ameerj): libavcodec gpu hw acceleration
74
75 const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
76 if (av_error < 0) {
77 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
78 avcodec_close(av_codec_ctx);
79 return;
80 }
81 initialized = true;
82 is_first_frame = true;
83 } 85 }
84 bool vp9_hidden_frame = false;
85 86
87 bool vp9_hidden_frame = false;
86 AVPacket packet{}; 88 AVPacket packet{};
87 av_init_packet(&packet); 89 av_init_packet(&packet);
88 std::vector<u8> frame_data; 90 std::vector<u8> frame_data;
@@ -95,7 +97,7 @@ void Codec::Decode() {
95 } 97 }
96 98
97 packet.data = frame_data.data(); 99 packet.data = frame_data.data();
98 packet.size = static_cast<int>(frame_data.size()); 100 packet.size = static_cast<s32>(frame_data.size());
99 101
100 avcodec_send_packet(av_codec_ctx, &packet); 102 avcodec_send_packet(av_codec_ctx, &packet);
101 103
@@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
127 return current_codec; 129 return current_codec;
128} 130}
129 131
132std::string_view Codec::GetCurrentCodecName() const {
133 switch (current_codec) {
134 case NvdecCommon::VideoCodec::None:
135 return "None";
136 case NvdecCommon::VideoCodec::H264:
137 return "H264";
138 case NvdecCommon::VideoCodec::Vp8:
139 return "VP8";
140 case NvdecCommon::VideoCodec::H265:
141 return "H265";
142 case NvdecCommon::VideoCodec::Vp9:
143 return "VP9";
144 default:
145 return "Unknown";
146 }
147};
148
130} // namespace Tegra 149} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 3e135a2a6..f2aef1699 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -42,15 +42,15 @@ class VP9;
42 42
43class Codec { 43class Codec {
44public: 44public:
45 explicit Codec(GPU& gpu); 45 explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs);
46 ~Codec(); 46 ~Codec();
47 47
48 /// Initialize the codec, returning success or failure
49 void Initialize();
50
48 /// Sets NVDEC video stream codec 51 /// Sets NVDEC video stream codec
49 void SetTargetCodec(NvdecCommon::VideoCodec codec); 52 void SetTargetCodec(NvdecCommon::VideoCodec codec);
50 53
51 /// Populate NvdecRegisters state with argument value at the provided offset
52 void StateWrite(u32 offset, u64 arguments);
53
54 /// Call decoders to construct headers, decode AVFrame with ffmpeg 54 /// Call decoders to construct headers, decode AVFrame with ffmpeg
55 void Decode(); 55 void Decode();
56 56
@@ -59,6 +59,8 @@ public:
59 59
60 /// Returns the value of current_codec 60 /// Returns the value of current_codec
61 [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; 61 [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
62 /// Return name of the current codec
63 [[nodiscard]] std::string_view GetCurrentCodecName() const;
62 64
63private: 65private:
64 bool initialized{}; 66 bool initialized{};
@@ -68,10 +70,10 @@ private:
68 AVCodecContext* av_codec_ctx{nullptr}; 70 AVCodecContext* av_codec_ctx{nullptr};
69 71
70 GPU& gpu; 72 GPU& gpu;
73 const NvdecCommon::NvdecRegisters& state;
71 std::unique_ptr<Decoder::H264> h264_decoder; 74 std::unique_ptr<Decoder::H264> h264_decoder;
72 std::unique_ptr<Decoder::VP9> vp9_decoder; 75 std::unique_ptr<Decoder::VP9> vp9_decoder;
73 76
74 NvdecCommon::NvdecRegisters state{};
75 std::queue<AVFramePtr> av_frames{}; 77 std::queue<AVFramePtr> av_frames{};
76}; 78};
77 79
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index fea6aed98..5fb6d45ee 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -45,134 +45,129 @@ H264::~H264() = default;
45 45
46const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, 46const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
47 bool is_first_frame) { 47 bool is_first_frame) {
48 H264DecoderContext context{}; 48 H264DecoderContext context;
49 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); 49 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
50 50
51 const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); 51 const s64 frame_number = context.h264_parameter_set.frame_number.Value();
52 if (!is_first_frame && frame_number != 0) { 52 if (!is_first_frame && frame_number != 0) {
53 frame.resize(context.frame_data_size); 53 frame.resize(context.stream_len);
54
55 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); 54 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
56 } else { 55 return frame;
57 /// Encode header 56 }
58 H264BitWriter writer{};
59 writer.WriteU(1, 24);
60 writer.WriteU(0, 1);
61 writer.WriteU(3, 2);
62 writer.WriteU(7, 5);
63 writer.WriteU(100, 8);
64 writer.WriteU(0, 8);
65 writer.WriteU(31, 8);
66 writer.WriteUe(0);
67 const auto chroma_format_idc =
68 static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
69 writer.WriteUe(chroma_format_idc);
70 if (chroma_format_idc == 3) {
71 writer.WriteBit(false);
72 }
73
74 writer.WriteUe(0);
75 writer.WriteUe(0);
76 writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
77 writer.WriteBit(false); // Scaling matrix present flag
78
79 const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
80 writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
81 writer.WriteUe(order_cnt_type);
82 if (order_cnt_type == 0) {
83 writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
84 } else if (order_cnt_type == 1) {
85 writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
86
87 writer.WriteSe(0);
88 writer.WriteSe(0);
89 writer.WriteUe(0);
90 }
91
92 const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
93 (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
94 57
95 writer.WriteUe(16); 58 // Encode header
59 H264BitWriter writer{};
60 writer.WriteU(1, 24);
61 writer.WriteU(0, 1);
62 writer.WriteU(3, 2);
63 writer.WriteU(7, 5);
64 writer.WriteU(100, 8);
65 writer.WriteU(0, 8);
66 writer.WriteU(31, 8);
67 writer.WriteUe(0);
68 const u32 chroma_format_idc =
69 static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
70 writer.WriteUe(chroma_format_idc);
71 if (chroma_format_idc == 3) {
96 writer.WriteBit(false); 72 writer.WriteBit(false);
97 writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); 73 }
98 writer.WriteUe(pic_height - 1);
99 writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
100
101 if (!context.h264_parameter_set.frame_mbs_only_flag) {
102 writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
103 }
104 74
105 writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); 75 writer.WriteUe(0);
106 writer.WriteBit(false); // Frame cropping flag 76 writer.WriteUe(0);
107 writer.WriteBit(false); // VUI parameter present flag 77 writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
78 writer.WriteBit(false); // Scaling matrix present flag
108 79
109 writer.End(); 80 writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
110 81
111 // H264 PPS 82 const auto order_cnt_type =
112 writer.WriteU(1, 24); 83 static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
113 writer.WriteU(0, 1); 84 writer.WriteUe(order_cnt_type);
114 writer.WriteU(3, 2); 85 if (order_cnt_type == 0) {
115 writer.WriteU(8, 5); 86 writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
87 } else if (order_cnt_type == 1) {
88 writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
116 89
90 writer.WriteSe(0);
91 writer.WriteSe(0);
117 writer.WriteUe(0); 92 writer.WriteUe(0);
118 writer.WriteUe(0); 93 }
119 94
120 writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); 95 const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
121 writer.WriteBit(false); 96 (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
122 writer.WriteUe(0); 97
123 writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); 98 writer.WriteUe(16);
124 writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); 99 writer.WriteBit(false);
125 writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); 100 writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
126 writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); 101 writer.WriteUe(pic_height - 1);
127 s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); 102 writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
128 pic_init_qp = (pic_init_qp << 26) >> 26;
129 writer.WriteSe(pic_init_qp);
130 writer.WriteSe(0);
131 s32 chroma_qp_index_offset =
132 static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
133 chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
134 103
135 writer.WriteSe(chroma_qp_index_offset); 104 if (!context.h264_parameter_set.frame_mbs_only_flag) {
136 writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); 105 writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
137 writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); 106 }
138 writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
139 writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
140 107
108 writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
109 writer.WriteBit(false); // Frame cropping flag
110 writer.WriteBit(false); // VUI parameter present flag
111
112 writer.End();
113
114 // H264 PPS
115 writer.WriteU(1, 24);
116 writer.WriteU(0, 1);
117 writer.WriteU(3, 2);
118 writer.WriteU(8, 5);
119
120 writer.WriteUe(0);
121 writer.WriteUe(0);
122
123 writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
124 writer.WriteBit(false);
125 writer.WriteUe(0);
126 writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
127 writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
128 writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
129 writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
130 s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
131 writer.WriteSe(pic_init_qp);
132 writer.WriteSe(0);
133 s32 chroma_qp_index_offset =
134 static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
135
136 writer.WriteSe(chroma_qp_index_offset);
137 writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
138 writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
139 writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
140 writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
141
142 writer.WriteBit(true);
143
144 for (s32 index = 0; index < 6; index++) {
141 writer.WriteBit(true); 145 writer.WriteBit(true);
146 std::span<const u8> matrix{context.weight_scale};
147 writer.WriteScalingList(matrix, index * 16, 16);
148 }
142 149
143 for (s32 index = 0; index < 6; index++) { 150 if (context.h264_parameter_set.transform_8x8_mode_flag) {
151 for (s32 index = 0; index < 2; index++) {
144 writer.WriteBit(true); 152 writer.WriteBit(true);
145 const auto matrix_x4 = 153 std::span<const u8> matrix{context.weight_scale_8x8};
146 std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); 154 writer.WriteScalingList(matrix, index * 64, 64);
147 writer.WriteScalingList(matrix_x4, index * 16, 16);
148 }
149
150 if (context.h264_parameter_set.transform_8x8_mode_flag) {
151 for (s32 index = 0; index < 2; index++) {
152 writer.WriteBit(true);
153 const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
154 context.scaling_matrix_8.end());
155
156 writer.WriteScalingList(matrix_x8, index * 64, 64);
157 }
158 } 155 }
156 }
159 157
160 s32 chroma_qp_index_offset2 = 158 s32 chroma_qp_index_offset2 =
161 static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); 159 static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
162 chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
163 160
164 writer.WriteSe(chroma_qp_index_offset2); 161 writer.WriteSe(chroma_qp_index_offset2);
165 162
166 writer.End(); 163 writer.End();
167 164
168 const auto& encoded_header = writer.GetByteArray(); 165 const auto& encoded_header = writer.GetByteArray();
169 frame.resize(encoded_header.size() + context.frame_data_size); 166 frame.resize(encoded_header.size() + context.stream_len);
170 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); 167 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
171 168
172 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, 169 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
173 frame.data() + encoded_header.size(), 170 frame.data() + encoded_header.size(), context.stream_len);
174 context.frame_data_size);
175 }
176 171
177 return frame; 172 return frame;
178} 173}
@@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) {
202 WriteBits(state ? 1 : 0, 1); 197 WriteBits(state ? 1 : 0, 1);
203} 198}
204 199
205void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { 200void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
206 std::vector<u8> scan(count); 201 std::vector<u8> scan(count);
207 if (count == 16) { 202 if (count == 16) {
208 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); 203 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index 0f3a1d9f3..bfe84a472 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -20,7 +20,9 @@
20 20
21#pragma once 21#pragma once
22 22
23#include <span>
23#include <vector> 24#include <vector>
25#include "common/bit_field.h"
24#include "common/common_funcs.h" 26#include "common/common_funcs.h"
25#include "common/common_types.h" 27#include "common/common_types.h"
26#include "video_core/command_classes/nvdec_common.h" 28#include "video_core/command_classes/nvdec_common.h"
@@ -48,7 +50,7 @@ public:
48 50
49 /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification 51 /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
50 /// Writes the scaling matrices of the sream 52 /// Writes the scaling matrices of the sream
51 void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); 53 void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
52 54
53 /// Return the bitstream as a vector. 55 /// Return the bitstream as a vector.
54 [[nodiscard]] std::vector<u8>& GetByteArray(); 56 [[nodiscard]] std::vector<u8>& GetByteArray();
@@ -78,40 +80,110 @@ public:
78 const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); 80 const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
79 81
80private: 82private:
83 std::vector<u8> frame;
84 GPU& gpu;
85
81 struct H264ParameterSet { 86 struct H264ParameterSet {
82 u32 log2_max_pic_order_cnt{}; 87 s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
83 u32 delta_pic_order_always_zero_flag{}; 88 s32 delta_pic_order_always_zero_flag; ///< 0x04
84 u32 frame_mbs_only_flag{}; 89 s32 frame_mbs_only_flag; ///< 0x08
85 u32 pic_width_in_mbs{}; 90 u32 pic_width_in_mbs; ///< 0x0C
86 u32 pic_height_in_map_units{}; 91 u32 frame_height_in_map_units; ///< 0x10
87 INSERT_PADDING_WORDS(1); 92 union { ///< 0x14
88 u32 entropy_coding_mode_flag{}; 93 BitField<0, 2, u32> tile_format;
89 u32 bottom_field_pic_order_flag{}; 94 BitField<2, 3, u32> gob_height;
90 u32 num_refidx_l0_default_active{}; 95 };
91 u32 num_refidx_l1_default_active{}; 96 u32 entropy_coding_mode_flag; ///< 0x18
92 u32 deblocking_filter_control_flag{}; 97 s32 pic_order_present_flag; ///< 0x1C
93 u32 redundant_pic_count_flag{}; 98 s32 num_refidx_l0_default_active; ///< 0x20
94 u32 transform_8x8_mode_flag{}; 99 s32 num_refidx_l1_default_active; ///< 0x24
95 INSERT_PADDING_WORDS(9); 100 s32 deblocking_filter_control_present_flag; ///< 0x28
96 u64 flags{}; 101 s32 redundant_pic_cnt_present_flag; ///< 0x2C
97 u32 frame_number{}; 102 u32 transform_8x8_mode_flag; ///< 0x30
98 u32 frame_number2{}; 103 u32 pitch_luma; ///< 0x34
104 u32 pitch_chroma; ///< 0x38
105 u32 luma_top_offset; ///< 0x3C
106 u32 luma_bot_offset; ///< 0x40
107 u32 luma_frame_offset; ///< 0x44
108 u32 chroma_top_offset; ///< 0x48
109 u32 chroma_bot_offset; ///< 0x4C
110 u32 chroma_frame_offset; ///< 0x50
111 u32 hist_buffer_size; ///< 0x54
112 union { ///< 0x58
113 union {
114 BitField<0, 1, u64> mbaff_frame;
115 BitField<1, 1, u64> direct_8x8_inference;
116 BitField<2, 1, u64> weighted_pred;
117 BitField<3, 1, u64> constrained_intra_pred;
118 BitField<4, 1, u64> ref_pic;
119 BitField<5, 1, u64> field_pic;
120 BitField<6, 1, u64> bottom_field;
121 BitField<7, 1, u64> second_field;
122 } flags;
123 BitField<8, 4, u64> log2_max_frame_num_minus4;
124 BitField<12, 2, u64> chroma_format_idc;
125 BitField<14, 2, u64> pic_order_cnt_type;
126 BitField<16, 6, s64> pic_init_qp_minus26;
127 BitField<22, 5, s64> chroma_qp_index_offset;
128 BitField<27, 5, s64> second_chroma_qp_index_offset;
129 BitField<32, 2, u64> weighted_bipred_idc;
130 BitField<34, 7, u64> curr_pic_idx;
131 BitField<41, 5, u64> curr_col_idx;
132 BitField<46, 16, u64> frame_number;
133 BitField<62, 1, u64> frame_surfaces;
134 BitField<63, 1, u64> output_memory_layout;
135 };
99 }; 136 };
100 static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); 137 static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
101 138
102 struct H264DecoderContext { 139 struct H264DecoderContext {
103 INSERT_PADDING_BYTES(0x48); 140 INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000
104 u32 frame_data_size{}; 141 u32 stream_len; ///< 0x0048
105 INSERT_PADDING_BYTES(0xc); 142 INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C
106 H264ParameterSet h264_parameter_set{}; 143 H264ParameterSet h264_parameter_set; ///< 0x0058
107 INSERT_PADDING_BYTES(0x100); 144 INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8
108 std::array<u8, 0x60> scaling_matrix_4; 145 std::array<u8, 0x60> weight_scale; ///< 0x01C0
109 std::array<u8, 0x80> scaling_matrix_8; 146 std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
110 }; 147 };
111 static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); 148 static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
112 149
113 std::vector<u8> frame; 150#define ASSERT_POSITION(field_name, position) \
114 GPU& gpu; 151 static_assert(offsetof(H264ParameterSet, field_name) == position, \
152 "Field " #field_name " has invalid position")
153
154 ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
155 ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
156 ASSERT_POSITION(frame_mbs_only_flag, 0x08);
157 ASSERT_POSITION(pic_width_in_mbs, 0x0C);
158 ASSERT_POSITION(frame_height_in_map_units, 0x10);
159 ASSERT_POSITION(tile_format, 0x14);
160 ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
161 ASSERT_POSITION(pic_order_present_flag, 0x1C);
162 ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
163 ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
164 ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
165 ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
166 ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
167 ASSERT_POSITION(pitch_luma, 0x34);
168 ASSERT_POSITION(pitch_chroma, 0x38);
169 ASSERT_POSITION(luma_top_offset, 0x3C);
170 ASSERT_POSITION(luma_bot_offset, 0x40);
171 ASSERT_POSITION(luma_frame_offset, 0x44);
172 ASSERT_POSITION(chroma_top_offset, 0x48);
173 ASSERT_POSITION(chroma_bot_offset, 0x4C);
174 ASSERT_POSITION(chroma_frame_offset, 0x50);
175 ASSERT_POSITION(hist_buffer_size, 0x54);
176 ASSERT_POSITION(flags, 0x58);
177#undef ASSERT_POSITION
178
179#define ASSERT_POSITION(field_name, position) \
180 static_assert(offsetof(H264DecoderContext, field_name) == position, \
181 "Field " #field_name " has invalid position")
182
183 ASSERT_POSITION(stream_len, 0x48);
184 ASSERT_POSITION(h264_parameter_set, 0x58);
185 ASSERT_POSITION(weight_scale, 0x1C0);
186#undef ASSERT_POSITION
115}; 187};
116 188
117} // namespace Decoder 189} // namespace Decoder
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 29bb31418..902bc2a98 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
354} 354}
355 355
356Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { 356Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
357 PictureInfo picture_info{}; 357 PictureInfo picture_info;
358 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); 358 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
359 Vp9PictureInfo vp9_info = picture_info.Convert(); 359 Vp9PictureInfo vp9_info = picture_info.Convert();
360 360
@@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state)
370} 370}
371 371
372void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { 372void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
373 EntropyProbs entropy{}; 373 EntropyProbs entropy;
374 gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); 374 gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
375 entropy.Convert(dst); 375 entropy.Convert(dst);
376} 376}
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index 139501a1c..2da14f3ca 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -15,10 +15,10 @@ class GPU;
15 15
16namespace Decoder { 16namespace Decoder {
17struct Vp9FrameDimensions { 17struct Vp9FrameDimensions {
18 s16 width{}; 18 s16 width;
19 s16 height{}; 19 s16 height;
20 s16 luma_pitch{}; 20 s16 luma_pitch;
21 s16 chroma_pitch{}; 21 s16 chroma_pitch;
22}; 22};
23static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); 23static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
24 24
@@ -49,87 +49,87 @@ enum class TxMode {
49}; 49};
50 50
51struct Segmentation { 51struct Segmentation {
52 u8 enabled{}; 52 u8 enabled;
53 u8 update_map{}; 53 u8 update_map;
54 u8 temporal_update{}; 54 u8 temporal_update;
55 u8 abs_delta{}; 55 u8 abs_delta;
56 std::array<u32, 8> feature_mask{}; 56 std::array<u32, 8> feature_mask;
57 std::array<std::array<s16, 4>, 8> feature_data{}; 57 std::array<std::array<s16, 4>, 8> feature_data;
58}; 58};
59static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); 59static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
60 60
61struct LoopFilter { 61struct LoopFilter {
62 u8 mode_ref_delta_enabled{}; 62 u8 mode_ref_delta_enabled;
63 std::array<s8, 4> ref_deltas{}; 63 std::array<s8, 4> ref_deltas;
64 std::array<s8, 2> mode_deltas{}; 64 std::array<s8, 2> mode_deltas;
65}; 65};
66static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); 66static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
67 67
68struct Vp9EntropyProbs { 68struct Vp9EntropyProbs {
69 std::array<u8, 36> y_mode_prob{}; 69 std::array<u8, 36> y_mode_prob; ///< 0x0000
70 std::array<u8, 64> partition_prob{}; 70 std::array<u8, 64> partition_prob; ///< 0x0024
71 std::array<u8, 1728> coef_probs{}; 71 std::array<u8, 1728> coef_probs; ///< 0x0064
72 std::array<u8, 8> switchable_interp_prob{}; 72 std::array<u8, 8> switchable_interp_prob; ///< 0x0724
73 std::array<u8, 28> inter_mode_prob{}; 73 std::array<u8, 28> inter_mode_prob; ///< 0x072C
74 std::array<u8, 4> intra_inter_prob{}; 74 std::array<u8, 4> intra_inter_prob; ///< 0x0748
75 std::array<u8, 5> comp_inter_prob{}; 75 std::array<u8, 5> comp_inter_prob; ///< 0x074C
76 std::array<u8, 10> single_ref_prob{}; 76 std::array<u8, 10> single_ref_prob; ///< 0x0751
77 std::array<u8, 5> comp_ref_prob{}; 77 std::array<u8, 5> comp_ref_prob; ///< 0x075B
78 std::array<u8, 6> tx_32x32_prob{}; 78 std::array<u8, 6> tx_32x32_prob; ///< 0x0760
79 std::array<u8, 4> tx_16x16_prob{}; 79 std::array<u8, 4> tx_16x16_prob; ///< 0x0766
80 std::array<u8, 2> tx_8x8_prob{}; 80 std::array<u8, 2> tx_8x8_prob; ///< 0x076A
81 std::array<u8, 3> skip_probs{}; 81 std::array<u8, 3> skip_probs; ///< 0x076C
82 std::array<u8, 3> joints{}; 82 std::array<u8, 3> joints; ///< 0x076F
83 std::array<u8, 2> sign{}; 83 std::array<u8, 2> sign; ///< 0x0772
84 std::array<u8, 20> classes{}; 84 std::array<u8, 20> classes; ///< 0x0774
85 std::array<u8, 2> class_0{}; 85 std::array<u8, 2> class_0; ///< 0x0788
86 std::array<u8, 20> prob_bits{}; 86 std::array<u8, 20> prob_bits; ///< 0x078A
87 std::array<u8, 12> class_0_fr{}; 87 std::array<u8, 12> class_0_fr; ///< 0x079E
88 std::array<u8, 6> fr{}; 88 std::array<u8, 6> fr; ///< 0x07AA
89 std::array<u8, 2> class_0_hp{}; 89 std::array<u8, 2> class_0_hp; ///< 0x07B0
90 std::array<u8, 2> high_precision{}; 90 std::array<u8, 2> high_precision; ///< 0x07B2
91}; 91};
92static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); 92static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
93 93
94struct Vp9PictureInfo { 94struct Vp9PictureInfo {
95 bool is_key_frame{}; 95 bool is_key_frame;
96 bool intra_only{}; 96 bool intra_only;
97 bool last_frame_was_key{}; 97 bool last_frame_was_key;
98 bool frame_size_changed{}; 98 bool frame_size_changed;
99 bool error_resilient_mode{}; 99 bool error_resilient_mode;
100 bool last_frame_shown{}; 100 bool last_frame_shown;
101 bool show_frame{}; 101 bool show_frame;
102 std::array<s8, 4> ref_frame_sign_bias{}; 102 std::array<s8, 4> ref_frame_sign_bias;
103 s32 base_q_index{}; 103 s32 base_q_index;
104 s32 y_dc_delta_q{}; 104 s32 y_dc_delta_q;
105 s32 uv_dc_delta_q{}; 105 s32 uv_dc_delta_q;
106 s32 uv_ac_delta_q{}; 106 s32 uv_ac_delta_q;
107 bool lossless{}; 107 bool lossless;
108 s32 transform_mode{}; 108 s32 transform_mode;
109 bool allow_high_precision_mv{}; 109 bool allow_high_precision_mv;
110 s32 interp_filter{}; 110 s32 interp_filter;
111 s32 reference_mode{}; 111 s32 reference_mode;
112 s8 comp_fixed_ref{}; 112 s8 comp_fixed_ref;
113 std::array<s8, 2> comp_var_ref{}; 113 std::array<s8, 2> comp_var_ref;
114 s32 log2_tile_cols{}; 114 s32 log2_tile_cols;
115 s32 log2_tile_rows{}; 115 s32 log2_tile_rows;
116 bool segment_enabled{}; 116 bool segment_enabled;
117 bool segment_map_update{}; 117 bool segment_map_update;
118 bool segment_map_temporal_update{}; 118 bool segment_map_temporal_update;
119 s32 segment_abs_delta{}; 119 s32 segment_abs_delta;
120 std::array<u32, 8> segment_feature_enable{}; 120 std::array<u32, 8> segment_feature_enable;
121 std::array<std::array<s16, 4>, 8> segment_feature_data{}; 121 std::array<std::array<s16, 4>, 8> segment_feature_data;
122 bool mode_ref_delta_enabled{}; 122 bool mode_ref_delta_enabled;
123 bool use_prev_in_find_mv_refs{}; 123 bool use_prev_in_find_mv_refs;
124 std::array<s8, 4> ref_deltas{}; 124 std::array<s8, 4> ref_deltas;
125 std::array<s8, 2> mode_deltas{}; 125 std::array<s8, 2> mode_deltas;
126 Vp9EntropyProbs entropy{}; 126 Vp9EntropyProbs entropy;
127 Vp9FrameDimensions frame_size{}; 127 Vp9FrameDimensions frame_size;
128 u8 first_level{}; 128 u8 first_level;
129 u8 sharpness_level{}; 129 u8 sharpness_level;
130 u32 bitstream_size{}; 130 u32 bitstream_size;
131 std::array<u64, 4> frame_offsets{}; 131 std::array<u64, 4> frame_offsets;
132 std::array<bool, 4> refresh_frame{}; 132 std::array<bool, 4> refresh_frame;
133}; 133};
134 134
135struct Vp9FrameContainer { 135struct Vp9FrameContainer {
@@ -138,35 +138,35 @@ struct Vp9FrameContainer {
138}; 138};
139 139
140struct PictureInfo { 140struct PictureInfo {
141 INSERT_PADDING_WORDS(12); 141 INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00
142 u32 bitstream_size{}; 142 u32 bitstream_size; ///< 0x30
143 INSERT_PADDING_WORDS(5); 143 INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34
144 Vp9FrameDimensions last_frame_size{}; 144 Vp9FrameDimensions last_frame_size; ///< 0x48
145 Vp9FrameDimensions golden_frame_size{}; 145 Vp9FrameDimensions golden_frame_size; ///< 0x50
146 Vp9FrameDimensions alt_frame_size{}; 146 Vp9FrameDimensions alt_frame_size; ///< 0x58
147 Vp9FrameDimensions current_frame_size{}; 147 Vp9FrameDimensions current_frame_size; ///< 0x60
148 u32 vp9_flags{}; 148 u32 vp9_flags; ///< 0x68
149 std::array<s8, 4> ref_frame_sign_bias{}; 149 std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
150 u8 first_level{}; 150 u8 first_level; ///< 0x70
151 u8 sharpness_level{}; 151 u8 sharpness_level; ///< 0x71
152 u8 base_q_index{}; 152 u8 base_q_index; ///< 0x72
153 u8 y_dc_delta_q{}; 153 u8 y_dc_delta_q; ///< 0x73
154 u8 uv_ac_delta_q{}; 154 u8 uv_ac_delta_q; ///< 0x74
155 u8 uv_dc_delta_q{}; 155 u8 uv_dc_delta_q; ///< 0x75
156 u8 lossless{}; 156 u8 lossless; ///< 0x76
157 u8 tx_mode{}; 157 u8 tx_mode; ///< 0x77
158 u8 allow_high_precision_mv{}; 158 u8 allow_high_precision_mv; ///< 0x78
159 u8 interp_filter{}; 159 u8 interp_filter; ///< 0x79
160 u8 reference_mode{}; 160 u8 reference_mode; ///< 0x7A
161 s8 comp_fixed_ref{}; 161 s8 comp_fixed_ref; ///< 0x7B
162 std::array<s8, 2> comp_var_ref{}; 162 std::array<s8, 2> comp_var_ref; ///< 0x7C
163 u8 log2_tile_cols{}; 163 u8 log2_tile_cols; ///< 0x7E
164 u8 log2_tile_rows{}; 164 u8 log2_tile_rows; ///< 0x7F
165 Segmentation segmentation{}; 165 Segmentation segmentation; ///< 0x80
166 LoopFilter loop_filter{}; 166 LoopFilter loop_filter; ///< 0xE4
167 INSERT_PADDING_BYTES(5); 167 INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB
168 u32 surface_params{}; 168 u32 surface_params; ///< 0xF0
169 INSERT_PADDING_WORDS(3); 169 INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4
170 170
171 [[nodiscard]] Vp9PictureInfo Convert() const { 171 [[nodiscard]] Vp9PictureInfo Convert() const {
172 return { 172 return {
@@ -176,6 +176,7 @@ struct PictureInfo {
176 .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, 176 .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
177 .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, 177 .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
178 .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, 178 .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
179 .show_frame = false,
179 .ref_frame_sign_bias = ref_frame_sign_bias, 180 .ref_frame_sign_bias = ref_frame_sign_bias,
180 .base_q_index = base_q_index, 181 .base_q_index = base_q_index,
181 .y_dc_delta_q = y_dc_delta_q, 182 .y_dc_delta_q = y_dc_delta_q,
@@ -204,45 +205,48 @@ struct PictureInfo {
204 !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), 205 !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
205 .ref_deltas = loop_filter.ref_deltas, 206 .ref_deltas = loop_filter.ref_deltas,
206 .mode_deltas = loop_filter.mode_deltas, 207 .mode_deltas = loop_filter.mode_deltas,
208 .entropy{},
207 .frame_size = current_frame_size, 209 .frame_size = current_frame_size,
208 .first_level = first_level, 210 .first_level = first_level,
209 .sharpness_level = sharpness_level, 211 .sharpness_level = sharpness_level,
210 .bitstream_size = bitstream_size, 212 .bitstream_size = bitstream_size,
213 .frame_offsets{},
214 .refresh_frame{},
211 }; 215 };
212 } 216 }
213}; 217};
214static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); 218static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
215 219
216struct EntropyProbs { 220struct EntropyProbs {
217 INSERT_PADDING_BYTES(1024); 221 INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000
218 std::array<u8, 28> inter_mode_prob{}; 222 std::array<u8, 28> inter_mode_prob; ///< 0x0400
219 std::array<u8, 4> intra_inter_prob{}; 223 std::array<u8, 4> intra_inter_prob; ///< 0x041C
220 INSERT_PADDING_BYTES(80); 224 INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420
221 std::array<u8, 2> tx_8x8_prob{}; 225 std::array<u8, 2> tx_8x8_prob; ///< 0x0470
222 std::array<u8, 4> tx_16x16_prob{}; 226 std::array<u8, 4> tx_16x16_prob; ///< 0x0472
223 std::array<u8, 6> tx_32x32_prob{}; 227 std::array<u8, 6> tx_32x32_prob; ///< 0x0476
224 std::array<u8, 4> y_mode_prob_e8{}; 228 std::array<u8, 4> y_mode_prob_e8; ///< 0x047C
225 std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; 229 std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480
226 INSERT_PADDING_BYTES(64); 230 INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0
227 std::array<u8, 64> partition_prob{}; 231 std::array<u8, 64> partition_prob; ///< 0x04E0
228 INSERT_PADDING_BYTES(10); 232 INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520
229 std::array<u8, 8> switchable_interp_prob{}; 233 std::array<u8, 8> switchable_interp_prob; ///< 0x052A
230 std::array<u8, 5> comp_inter_prob{}; 234 std::array<u8, 5> comp_inter_prob; ///< 0x0532
231 std::array<u8, 3> skip_probs{}; 235 std::array<u8, 3> skip_probs; ///< 0x0537
232 INSERT_PADDING_BYTES(1); 236 INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A
233 std::array<u8, 3> joints{}; 237 std::array<u8, 3> joints; ///< 0x053B
234 std::array<u8, 2> sign{}; 238 std::array<u8, 2> sign; ///< 0x053E
235 std::array<u8, 2> class_0{}; 239 std::array<u8, 2> class_0; ///< 0x0540
236 std::array<u8, 6> fr{}; 240 std::array<u8, 6> fr; ///< 0x0542
237 std::array<u8, 2> class_0_hp{}; 241 std::array<u8, 2> class_0_hp; ///< 0x0548
238 std::array<u8, 2> high_precision{}; 242 std::array<u8, 2> high_precision; ///< 0x054A
239 std::array<u8, 20> classes{}; 243 std::array<u8, 20> classes; ///< 0x054C
240 std::array<u8, 12> class_0_fr{}; 244 std::array<u8, 12> class_0_fr; ///< 0x0560
241 std::array<u8, 20> pred_bits{}; 245 std::array<u8, 20> pred_bits; ///< 0x056C
242 std::array<u8, 10> single_ref_prob{}; 246 std::array<u8, 10> single_ref_prob; ///< 0x0580
243 std::array<u8, 5> comp_ref_prob{}; 247 std::array<u8, 5> comp_ref_prob; ///< 0x058A
244 INSERT_PADDING_BYTES(17); 248 INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F
245 std::array<u8, 2304> coef_probs{}; 249 std::array<u8, 2304> coef_probs; ///< 0x05A0
246 250
247 void Convert(Vp9EntropyProbs& fc) { 251 void Convert(Vp9EntropyProbs& fc) {
248 fc.inter_mode_prob = inter_mode_prob; 252 fc.inter_mode_prob = inter_mode_prob;
@@ -293,10 +297,45 @@ struct RefPoolElement {
293}; 297};
294 298
295struct FrameContexts { 299struct FrameContexts {
296 s64 from{}; 300 s64 from;
297 bool adapted{}; 301 bool adapted;
298 Vp9EntropyProbs probs{}; 302 Vp9EntropyProbs probs;
299}; 303};
300 304
305#define ASSERT_POSITION(field_name, position) \
306 static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
307 "Field " #field_name " has invalid position")
308
309ASSERT_POSITION(partition_prob, 0x0024);
310ASSERT_POSITION(switchable_interp_prob, 0x0724);
311ASSERT_POSITION(sign, 0x0772);
312ASSERT_POSITION(class_0_fr, 0x079E);
313ASSERT_POSITION(high_precision, 0x07B2);
314#undef ASSERT_POSITION
315
316#define ASSERT_POSITION(field_name, position) \
317 static_assert(offsetof(PictureInfo, field_name) == position, \
318 "Field " #field_name " has invalid position")
319
320ASSERT_POSITION(bitstream_size, 0x30);
321ASSERT_POSITION(last_frame_size, 0x48);
322ASSERT_POSITION(first_level, 0x70);
323ASSERT_POSITION(segmentation, 0x80);
324ASSERT_POSITION(loop_filter, 0xE4);
325ASSERT_POSITION(surface_params, 0xF0);
326#undef ASSERT_POSITION
327
328#define ASSERT_POSITION(field_name, position) \
329 static_assert(offsetof(EntropyProbs, field_name) == position, \
330 "Field " #field_name " has invalid position")
331
332ASSERT_POSITION(inter_mode_prob, 0x400);
333ASSERT_POSITION(tx_8x8_prob, 0x470);
334ASSERT_POSITION(partition_prob, 0x4E0);
335ASSERT_POSITION(class_0, 0x540);
336ASSERT_POSITION(class_0_fr, 0x560);
337ASSERT_POSITION(coef_probs, 0x5A0);
338#undef ASSERT_POSITION
339
301}; // namespace Decoder 340}; // namespace Decoder
302}; // namespace Tegra 341}; // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index e4f919afd..b5e3b70fc 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -8,22 +8,21 @@
8 8
9namespace Tegra { 9namespace Tegra {
10 10
11Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} 11#define NVDEC_REG_INDEX(field_name) \
12 (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
13
14Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {}
12 15
13Nvdec::~Nvdec() = default; 16Nvdec::~Nvdec() = default;
14 17
15void Nvdec::ProcessMethod(Method method, u32 argument) { 18void Nvdec::ProcessMethod(u32 method, u32 argument) {
16 if (method == Method::SetVideoCodec) { 19 state.reg_array[method] = static_cast<u64>(argument) << 8;
17 codec->StateWrite(static_cast<u32>(method), argument);
18 } else {
19 codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8);
20 }
21 20
22 switch (method) { 21 switch (method) {
23 case Method::SetVideoCodec: 22 case NVDEC_REG_INDEX(set_codec_id):
24 codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); 23 codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
25 break; 24 break;
26 case Method::Execute: 25 case NVDEC_REG_INDEX(execute):
27 Execute(); 26 Execute();
28 break; 27 break;
29 } 28 }
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index e66be80b8..6e1da0b04 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -14,16 +14,11 @@ class GPU;
14 14
15class Nvdec { 15class Nvdec {
16public: 16public:
17 enum class Method : u32 {
18 SetVideoCodec = 0x80,
19 Execute = 0xc0,
20 };
21
22 explicit Nvdec(GPU& gpu); 17 explicit Nvdec(GPU& gpu);
23 ~Nvdec(); 18 ~Nvdec();
24 19
25 /// Writes the method into the state, Invoke Execute() if encountered 20 /// Writes the method into the state, Invoke Execute() if encountered
26 void ProcessMethod(Method method, u32 argument); 21 void ProcessMethod(u32 method, u32 argument);
27 22
28 /// Return most recently decoded frame 23 /// Return most recently decoded frame
29 [[nodiscard]] AVFramePtr GetFrame(); 24 [[nodiscard]] AVFramePtr GetFrame();
@@ -33,6 +28,7 @@ private:
33 void Execute(); 28 void Execute();
34 29
35 GPU& gpu; 30 GPU& gpu;
31 NvdecCommon::NvdecRegisters state;
36 std::unique_ptr<Codec> codec; 32 std::unique_ptr<Codec> codec;
37}; 33};
38} // namespace Tegra 34} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h
index 01b5e086d..6a24e00a0 100644
--- a/src/video_core/command_classes/nvdec_common.h
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -4,40 +4,13 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/bit_field.h"
7#include "common/common_funcs.h" 8#include "common/common_funcs.h"
8#include "common/common_types.h" 9#include "common/common_types.h"
9 10
10namespace Tegra::NvdecCommon { 11namespace Tegra::NvdecCommon {
11 12
12struct NvdecRegisters { 13enum class VideoCodec : u64 {
13 INSERT_PADDING_WORDS(256);
14 u64 set_codec_id{};
15 INSERT_PADDING_WORDS(254);
16 u64 set_platform_id{};
17 u64 picture_info_offset{};
18 u64 frame_bitstream_offset{};
19 u64 frame_number{};
20 u64 h264_slice_data_offsets{};
21 u64 h264_mv_dump_offset{};
22 INSERT_PADDING_WORDS(6);
23 u64 frame_stats_offset{};
24 u64 h264_last_surface_luma_offset{};
25 u64 h264_last_surface_chroma_offset{};
26 std::array<u64, 17> surface_luma_offset{};
27 std::array<u64, 17> surface_chroma_offset{};
28 INSERT_PADDING_WORDS(132);
29 u64 vp9_entropy_probs_offset{};
30 u64 vp9_backward_updates_offset{};
31 u64 vp9_last_frame_segmap_offset{};
32 u64 vp9_curr_frame_segmap_offset{};
33 INSERT_PADDING_WORDS(2);
34 u64 vp9_last_frame_mvs_offset{};
35 u64 vp9_curr_frame_mvs_offset{};
36 INSERT_PADDING_WORDS(2);
37};
38static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
39
40enum class VideoCodec : u32 {
41 None = 0x0, 14 None = 0x0,
42 H264 = 0x3, 15 H264 = 0x3,
43 Vp8 = 0x5, 16 Vp8 = 0x5,
@@ -45,4 +18,76 @@ enum class VideoCodec : u32 {
45 Vp9 = 0x9, 18 Vp9 = 0x9,
46}; 19};
47 20
21// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
22// doubling the sizes here is compensating for that.
23struct NvdecRegisters {
24 static constexpr std::size_t NUM_REGS = 0x178;
25
26 union {
27 struct {
28 INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000
29 VideoCodec set_codec_id; ///< 0x0400
30 INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408
31 u64 execute; ///< 0x0600
32 INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608
33 struct { ///< 0x0800
34 union {
35 BitField<0, 3, VideoCodec> codec;
36 BitField<4, 1, u64> gp_timer_on;
37 BitField<13, 1, u64> mb_timer_on;
38 BitField<14, 1, u64> intra_frame_pslc;
39 BitField<17, 1, u64> all_intra_frame;
40 };
41 } control_params;
42 u64 picture_info_offset; ///< 0x0808
43 u64 frame_bitstream_offset; ///< 0x0810
44 u64 frame_number; ///< 0x0818
45 u64 h264_slice_data_offsets; ///< 0x0820
46 u64 h264_mv_dump_offset; ///< 0x0828
47 INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
48 u64 frame_stats_offset; ///< 0x0848
49 u64 h264_last_surface_luma_offset; ///< 0x0850
50 u64 h264_last_surface_chroma_offset; ///< 0x0858
51 std::array<u64, 17> surface_luma_offset; ///< 0x0860
52 std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
53 INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970
54 u64 vp9_entropy_probs_offset; ///< 0x0B80
55 u64 vp9_backward_updates_offset; ///< 0x0B88
56 u64 vp9_last_frame_segmap_offset; ///< 0x0B90
57 u64 vp9_curr_frame_segmap_offset; ///< 0x0B98
58 INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0
59 u64 vp9_last_frame_mvs_offset; ///< 0x0BA8
60 u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0
61 INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8
62 };
63 std::array<u64, NUM_REGS> reg_array;
64 };
65};
66static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
67
68#define ASSERT_REG_POSITION(field_name, position) \
69 static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \
70 "Field " #field_name " has invalid position")
71
72ASSERT_REG_POSITION(set_codec_id, 0x80);
73ASSERT_REG_POSITION(execute, 0xC0);
74ASSERT_REG_POSITION(control_params, 0x100);
75ASSERT_REG_POSITION(picture_info_offset, 0x101);
76ASSERT_REG_POSITION(frame_bitstream_offset, 0x102);
77ASSERT_REG_POSITION(frame_number, 0x103);
78ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
79ASSERT_REG_POSITION(frame_stats_offset, 0x109);
80ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
81ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
82ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
83ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
84ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
85ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
86ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);
87ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173);
88ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175);
89ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
90
91#undef ASSERT_REG_POSITION
92
48} // namespace Tegra::NvdecCommon 93} // namespace Tegra::NvdecCommon
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 23948feed..a2c1599f7 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -341,6 +341,20 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
341[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, 341[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
342 VideoCommon::SubresourceLayers subresource, GLenum target) { 342 VideoCommon::SubresourceLayers subresource, GLenum target) {
343 switch (target) { 343 switch (target) {
344 case GL_TEXTURE_1D:
345 return CopyOrigin{
346 .level = static_cast<GLint>(subresource.base_level),
347 .x = static_cast<GLint>(offset.x),
348 .y = static_cast<GLint>(0),
349 .z = static_cast<GLint>(0),
350 };
351 case GL_TEXTURE_1D_ARRAY:
352 return CopyOrigin{
353 .level = static_cast<GLint>(subresource.base_level),
354 .x = static_cast<GLint>(offset.x),
355 .y = static_cast<GLint>(0),
356 .z = static_cast<GLint>(subresource.base_layer),
357 };
344 case GL_TEXTURE_2D_ARRAY: 358 case GL_TEXTURE_2D_ARRAY:
345 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: 359 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
346 return CopyOrigin{ 360 return CopyOrigin{
@@ -366,6 +380,18 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
366 VideoCommon::SubresourceLayers dst_subresource, 380 VideoCommon::SubresourceLayers dst_subresource,
367 GLenum target) { 381 GLenum target) {
368 switch (target) { 382 switch (target) {
383 case GL_TEXTURE_1D:
384 return CopyRegion{
385 .width = static_cast<GLsizei>(extent.width),
386 .height = static_cast<GLsizei>(1),
387 .depth = static_cast<GLsizei>(1),
388 };
389 case GL_TEXTURE_1D_ARRAY:
390 return CopyRegion{
391 .width = static_cast<GLsizei>(extent.width),
392 .height = static_cast<GLsizei>(1),
393 .depth = static_cast<GLsizei>(dst_subresource.num_layers),
394 };
369 case GL_TEXTURE_2D_ARRAY: 395 case GL_TEXTURE_2D_ARRAY:
370 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: 396 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
371 return CopyRegion{ 397 return CopyRegion{
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c7cfd02b6..d8dbd3824 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1057,9 +1057,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1057 std::vector<ImageId> right_aliased_ids; 1057 std::vector<ImageId> right_aliased_ids;
1058 std::vector<ImageId> bad_overlap_ids; 1058 std::vector<ImageId> bad_overlap_ids;
1059 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { 1059 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
1060 if (info.type != overlap.info.type) {
1061 return;
1062 }
1063 if (info.type == ImageType::Linear) { 1060 if (info.type == ImageType::Linear) {
1064 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { 1061 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1065 // Alias linear images with the same pitch 1062 // Alias linear images with the same pitch
diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp
index efdc6aa50..7a6f84d96 100644
--- a/src/yuzu/debugger/profiler.cpp
+++ b/src/yuzu/debugger/profiler.cpp
@@ -143,24 +143,25 @@ void MicroProfileWidget::hideEvent(QHideEvent* ev) {
143} 143}
144 144
145void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) { 145void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) {
146 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); 146 MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
147 ev->accept(); 147 ev->accept();
148} 148}
149 149
150void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) { 150void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) {
151 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); 151 MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
152 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); 152 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
153 ev->accept(); 153 ev->accept();
154} 154}
155 155
156void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { 156void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) {
157 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); 157 MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0);
158 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); 158 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
159 ev->accept(); 159 ev->accept();
160} 160}
161 161
162void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { 162void MicroProfileWidget::wheelEvent(QWheelEvent* ev) {
163 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, ev->delta() / 120); 163 MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale,
164 ev->angleDelta().y() / 120);
164 ev->accept(); 165 ev->accept();
165} 166}
166 167