diff options
| author | 2022-01-30 10:31:13 +0100 | |
|---|---|---|
| committer | 2022-10-06 21:00:52 +0200 | |
| commit | 668e80a9f42fb4ce0e16f6381d05bcbd286b2da1 (patch) | |
| tree | a1c668d6c3d00eade849b1d31dba4116095e4c12 /src/video_core/host1x | |
| parent | Texture Cache: Fix GC and GPU Modified on Joins. (diff) | |
| download | yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.gz yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.xz yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.zip | |
VideoCore: Refactor syncing.
Diffstat (limited to 'src/video_core/host1x')
21 files changed, 3310 insertions, 0 deletions
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp new file mode 100644 index 000000000..70c47ae03 --- /dev/null +++ b/src/video_core/host1x/codecs/codec.cpp | |||
| @@ -0,0 +1,310 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <algorithm> | ||
| 5 | #include <fstream> | ||
| 6 | #include <vector> | ||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/settings.h" | ||
| 9 | #include "video_core/gpu.h" | ||
| 10 | #include "video_core/host1x/codecs/codec.h" | ||
| 11 | #include "video_core/host1x/codecs/h264.h" | ||
| 12 | #include "video_core/host1x/codecs/vp8.h" | ||
| 13 | #include "video_core/host1x/codecs/vp9.h" | ||
| 14 | #include "video_core/memory_manager.h" | ||
| 15 | |||
| 16 | extern "C" { | ||
| 17 | #include <libavutil/opt.h> | ||
| 18 | #ifdef LIBVA_FOUND | ||
| 19 | // for querying VAAPI driver information | ||
| 20 | #include <libavutil/hwcontext_vaapi.h> | ||
| 21 | #endif | ||
| 22 | } | ||
| 23 | |||
| 24 | namespace Tegra { | ||
| 25 | namespace { | ||
| 26 | constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; | ||
| 27 | constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; | ||
| 28 | constexpr std::array PREFERRED_GPU_DECODERS = { | ||
| 29 | AV_HWDEVICE_TYPE_CUDA, | ||
| 30 | #ifdef _WIN32 | ||
| 31 | AV_HWDEVICE_TYPE_D3D11VA, | ||
| 32 | AV_HWDEVICE_TYPE_DXVA2, | ||
| 33 | #elif defined(__unix__) | ||
| 34 | AV_HWDEVICE_TYPE_VAAPI, | ||
| 35 | AV_HWDEVICE_TYPE_VDPAU, | ||
| 36 | #endif | ||
| 37 | // last resort for Linux Flatpak (w/ NVIDIA) | ||
| 38 | AV_HWDEVICE_TYPE_VULKAN, | ||
| 39 | }; | ||
| 40 | |||
| 41 | void AVPacketDeleter(AVPacket* ptr) { | ||
| 42 | av_packet_free(&ptr); | ||
| 43 | } | ||
| 44 | |||
| 45 | using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>; | ||
| 46 | |||
| 47 | AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) { | ||
| 48 | for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { | ||
| 49 | if (*p == av_codec_ctx->pix_fmt) { | ||
| 50 | return av_codec_ctx->pix_fmt; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU"); | ||
| 54 | av_buffer_unref(&av_codec_ctx->hw_device_ctx); | ||
| 55 | av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT; | ||
| 56 | return PREFERRED_CPU_FMT; | ||
| 57 | } | ||
| 58 | |||
| 59 | // List all the currently available hwcontext in ffmpeg | ||
| 60 | std::vector<AVHWDeviceType> ListSupportedContexts() { | ||
| 61 | std::vector<AVHWDeviceType> contexts{}; | ||
| 62 | AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; | ||
| 63 | do { | ||
| 64 | current_device_type = av_hwdevice_iterate_types(current_device_type); | ||
| 65 | contexts.push_back(current_device_type); | ||
| 66 | } while (current_device_type != AV_HWDEVICE_TYPE_NONE); | ||
| 67 | return contexts; | ||
| 68 | } | ||
| 69 | |||
| 70 | } // namespace | ||
| 71 | |||
| 72 | void AVFrameDeleter(AVFrame* ptr) { | ||
| 73 | av_frame_free(&ptr); | ||
| 74 | } | ||
| 75 | |||
| 76 | Codec::Codec(GPU& gpu_, const Host1x::NvdecCommon::NvdecRegisters& regs) | ||
| 77 | : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), | ||
| 78 | vp8_decoder(std::make_unique<Decoder::VP8>(gpu)), | ||
| 79 | vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} | ||
| 80 | |||
| 81 | Codec::~Codec() { | ||
| 82 | if (!initialized) { | ||
| 83 | return; | ||
| 84 | } | ||
| 85 | // Free libav memory | ||
| 86 | avcodec_free_context(&av_codec_ctx); | ||
| 87 | av_buffer_unref(&av_gpu_decoder); | ||
| 88 | } | ||
| 89 | |||
| 90 | bool Codec::CreateGpuAvDevice() { | ||
| 91 | static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; | ||
| 92 | static const auto supported_contexts = ListSupportedContexts(); | ||
| 93 | for (const auto& type : PREFERRED_GPU_DECODERS) { | ||
| 94 | if (std::none_of(supported_contexts.begin(), supported_contexts.end(), | ||
| 95 | [&type](const auto& context) { return context == type; })) { | ||
| 96 | LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type)); | ||
| 97 | continue; | ||
| 98 | } | ||
| 99 | // Avoid memory leak from not cleaning up after av_hwdevice_ctx_create | ||
| 100 | av_buffer_unref(&av_gpu_decoder); | ||
| 101 | const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); | ||
| 102 | if (hwdevice_res < 0) { | ||
| 103 | LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", | ||
| 104 | av_hwdevice_get_type_name(type), hwdevice_res); | ||
| 105 | continue; | ||
| 106 | } | ||
| 107 | #ifdef LIBVA_FOUND | ||
| 108 | if (type == AV_HWDEVICE_TYPE_VAAPI) { | ||
| 109 | // we need to determine if this is an impersonated VAAPI driver | ||
| 110 | AVHWDeviceContext* hwctx = | ||
| 111 | static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data)); | ||
| 112 | AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx); | ||
| 113 | const char* vendor_name = vaQueryVendorString(vactx->display); | ||
| 114 | if (strstr(vendor_name, "VDPAU backend")) { | ||
| 115 | // VDPAU impersonated VAAPI impl's are super buggy, we need to skip them | ||
| 116 | LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver"); | ||
| 117 | continue; | ||
| 118 | } else { | ||
| 119 | // according to some user testing, certain vaapi driver (Intel?) could be buggy | ||
| 120 | // so let's log the driver name which may help the developers/supporters | ||
| 121 | LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name); | ||
| 122 | } | ||
| 123 | } | ||
| 124 | #endif | ||
| 125 | for (int i = 0;; i++) { | ||
| 126 | const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); | ||
| 127 | if (!config) { | ||
| 128 | LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.", | ||
| 129 | av_codec->name, av_hwdevice_get_type_name(type)); | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) { | ||
| 133 | #if defined(__unix__) | ||
| 134 | // Some linux decoding backends are reported to crash with this config method | ||
| 135 | // TODO(ameerj): Properly support this method | ||
| 136 | if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) { | ||
| 137 | // skip zero-copy decoders, we don't currently support them | ||
| 138 | LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.", | ||
| 139 | av_hwdevice_get_type_name(type), config->methods); | ||
| 140 | continue; | ||
| 141 | } | ||
| 142 | #endif | ||
| 143 | LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); | ||
| 144 | av_codec_ctx->pix_fmt = config->pix_fmt; | ||
| 145 | return true; | ||
| 146 | } | ||
| 147 | } | ||
| 148 | } | ||
| 149 | return false; | ||
| 150 | } | ||
| 151 | |||
| 152 | void Codec::InitializeAvCodecContext() { | ||
| 153 | av_codec_ctx = avcodec_alloc_context3(av_codec); | ||
| 154 | av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); | ||
| 155 | } | ||
| 156 | |||
| 157 | void Codec::InitializeGpuDecoder() { | ||
| 158 | if (!CreateGpuAvDevice()) { | ||
| 159 | av_buffer_unref(&av_gpu_decoder); | ||
| 160 | return; | ||
| 161 | } | ||
| 162 | auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder); | ||
| 163 | ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); | ||
| 164 | av_codec_ctx->hw_device_ctx = hw_device_ctx; | ||
| 165 | av_codec_ctx->get_format = GetGpuFormat; | ||
| 166 | } | ||
| 167 | |||
| 168 | void Codec::Initialize() { | ||
| 169 | const AVCodecID codec = [&] { | ||
| 170 | switch (current_codec) { | ||
| 171 | case Host1x::NvdecCommon::VideoCodec::H264: | ||
| 172 | return AV_CODEC_ID_H264; | ||
| 173 | case Host1x::NvdecCommon::VideoCodec::VP8: | ||
| 174 | return AV_CODEC_ID_VP8; | ||
| 175 | case Host1x::NvdecCommon::VideoCodec::VP9: | ||
| 176 | return AV_CODEC_ID_VP9; | ||
| 177 | default: | ||
| 178 | UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); | ||
| 179 | return AV_CODEC_ID_NONE; | ||
| 180 | } | ||
| 181 | }(); | ||
| 182 | av_codec = avcodec_find_decoder(codec); | ||
| 183 | |||
| 184 | InitializeAvCodecContext(); | ||
| 185 | if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) { | ||
| 186 | InitializeGpuDecoder(); | ||
| 187 | } | ||
| 188 | if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) { | ||
| 189 | LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res); | ||
| 190 | avcodec_free_context(&av_codec_ctx); | ||
| 191 | av_buffer_unref(&av_gpu_decoder); | ||
| 192 | return; | ||
| 193 | } | ||
| 194 | if (!av_codec_ctx->hw_device_ctx) { | ||
| 195 | LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); | ||
| 196 | } | ||
| 197 | initialized = true; | ||
| 198 | } | ||
| 199 | |||
| 200 | void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { | ||
| 201 | if (current_codec != codec) { | ||
| 202 | current_codec = codec; | ||
| 203 | LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | void Codec::Decode() { | ||
| 208 | const bool is_first_frame = !initialized; | ||
| 209 | if (is_first_frame) { | ||
| 210 | Initialize(); | ||
| 211 | } | ||
| 212 | if (!initialized) { | ||
| 213 | return; | ||
| 214 | } | ||
| 215 | bool vp9_hidden_frame = false; | ||
| 216 | const auto& frame_data = [&]() { | ||
| 217 | switch (current_codec) { | ||
| 218 | case Tegra::Host1x::NvdecCommon::VideoCodec::H264: | ||
| 219 | return h264_decoder->ComposeFrame(state, is_first_frame); | ||
| 220 | case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: | ||
| 221 | return vp8_decoder->ComposeFrame(state); | ||
| 222 | case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: | ||
| 223 | vp9_decoder->ComposeFrame(state); | ||
| 224 | vp9_hidden_frame = vp9_decoder->WasFrameHidden(); | ||
| 225 | return vp9_decoder->GetFrameBytes(); | ||
| 226 | default: | ||
| 227 | ASSERT(false); | ||
| 228 | return std::vector<u8>{}; | ||
| 229 | } | ||
| 230 | }(); | ||
| 231 | AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; | ||
| 232 | if (!packet) { | ||
| 233 | LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); | ||
| 234 | return; | ||
| 235 | } | ||
| 236 | packet->data = const_cast<u8*>(frame_data.data()); | ||
| 237 | packet->size = static_cast<s32>(frame_data.size()); | ||
| 238 | if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) { | ||
| 239 | LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res); | ||
| 240 | return; | ||
| 241 | } | ||
| 242 | // Only receive/store visible frames | ||
| 243 | if (vp9_hidden_frame) { | ||
| 244 | return; | ||
| 245 | } | ||
| 246 | AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter}; | ||
| 247 | AVFramePtr final_frame{nullptr, AVFrameDeleter}; | ||
| 248 | ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed"); | ||
| 249 | if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) { | ||
| 250 | LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret); | ||
| 251 | return; | ||
| 252 | } | ||
| 253 | if (initial_frame->width == 0 || initial_frame->height == 0) { | ||
| 254 | LOG_WARNING(Service_NVDRV, "Zero width or height in frame"); | ||
| 255 | return; | ||
| 256 | } | ||
| 257 | if (av_codec_ctx->hw_device_ctx) { | ||
| 258 | final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; | ||
| 259 | ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed"); | ||
| 260 | // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp | ||
| 261 | // because Intel drivers crash unless using AV_PIX_FMT_NV12 | ||
| 262 | final_frame->format = PREFERRED_GPU_FMT; | ||
| 263 | const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); | ||
| 264 | ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); | ||
| 265 | } else { | ||
| 266 | final_frame = std::move(initial_frame); | ||
| 267 | } | ||
| 268 | if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) { | ||
| 269 | UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); | ||
| 270 | return; | ||
| 271 | } | ||
| 272 | av_frames.push(std::move(final_frame)); | ||
| 273 | if (av_frames.size() > 10) { | ||
| 274 | LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); | ||
| 275 | av_frames.pop(); | ||
| 276 | } | ||
| 277 | } | ||
| 278 | |||
| 279 | AVFramePtr Codec::GetCurrentFrame() { | ||
| 280 | // Sometimes VIC will request more frames than have been decoded. | ||
| 281 | // in this case, return a nullptr and don't overwrite previous frame data | ||
| 282 | if (av_frames.empty()) { | ||
| 283 | return AVFramePtr{nullptr, AVFrameDeleter}; | ||
| 284 | } | ||
| 285 | AVFramePtr frame = std::move(av_frames.front()); | ||
| 286 | av_frames.pop(); | ||
| 287 | return frame; | ||
| 288 | } | ||
| 289 | |||
| 290 | Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { | ||
| 291 | return current_codec; | ||
| 292 | } | ||
| 293 | |||
| 294 | std::string_view Codec::GetCurrentCodecName() const { | ||
| 295 | switch (current_codec) { | ||
| 296 | case Host1x::NvdecCommon::VideoCodec::None: | ||
| 297 | return "None"; | ||
| 298 | case Host1x::NvdecCommon::VideoCodec::H264: | ||
| 299 | return "H264"; | ||
| 300 | case Host1x::NvdecCommon::VideoCodec::VP8: | ||
| 301 | return "VP8"; | ||
| 302 | case Host1x::NvdecCommon::VideoCodec::H265: | ||
| 303 | return "H265"; | ||
| 304 | case Host1x::NvdecCommon::VideoCodec::VP9: | ||
| 305 | return "VP9"; | ||
| 306 | default: | ||
| 307 | return "Unknown"; | ||
| 308 | } | ||
| 309 | } | ||
| 310 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h new file mode 100644 index 000000000..117cb3ccd --- /dev/null +++ b/src/video_core/host1x/codecs/codec.h | |||
| @@ -0,0 +1,81 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <memory> | ||
| 7 | #include <string_view> | ||
| 8 | #include <queue> | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/host1x/nvdec_common.h" | ||
| 11 | |||
| 12 | extern "C" { | ||
| 13 | #if defined(__GNUC__) || defined(__clang__) | ||
| 14 | #pragma GCC diagnostic push | ||
| 15 | #pragma GCC diagnostic ignored "-Wconversion" | ||
| 16 | #endif | ||
| 17 | #include <libavcodec/avcodec.h> | ||
| 18 | #if defined(__GNUC__) || defined(__clang__) | ||
| 19 | #pragma GCC diagnostic pop | ||
| 20 | #endif | ||
| 21 | } | ||
| 22 | |||
| 23 | namespace Tegra { | ||
| 24 | class GPU; | ||
| 25 | |||
| 26 | void AVFrameDeleter(AVFrame* ptr); | ||
| 27 | using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>; | ||
| 28 | |||
| 29 | namespace Decoder { | ||
| 30 | class H264; | ||
| 31 | class VP8; | ||
| 32 | class VP9; | ||
| 33 | } // namespace Decoder | ||
| 34 | |||
| 35 | class Codec { | ||
| 36 | public: | ||
| 37 | explicit Codec(GPU& gpu, const Host1x::NvdecCommon::NvdecRegisters& regs); | ||
| 38 | ~Codec(); | ||
| 39 | |||
| 40 | /// Initialize the codec, returning success or failure | ||
| 41 | void Initialize(); | ||
| 42 | |||
| 43 | /// Sets NVDEC video stream codec | ||
| 44 | void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec); | ||
| 45 | |||
| 46 | /// Call decoders to construct headers, decode AVFrame with ffmpeg | ||
| 47 | void Decode(); | ||
| 48 | |||
| 49 | /// Returns next decoded frame | ||
| 50 | [[nodiscard]] AVFramePtr GetCurrentFrame(); | ||
| 51 | |||
| 52 | /// Returns the value of current_codec | ||
| 53 | [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const; | ||
| 54 | |||
| 55 | /// Return name of the current codec | ||
| 56 | [[nodiscard]] std::string_view GetCurrentCodecName() const; | ||
| 57 | |||
| 58 | private: | ||
| 59 | void InitializeAvCodecContext(); | ||
| 60 | |||
| 61 | void InitializeGpuDecoder(); | ||
| 62 | |||
| 63 | bool CreateGpuAvDevice(); | ||
| 64 | |||
| 65 | bool initialized{}; | ||
| 66 | Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; | ||
| 67 | |||
| 68 | const AVCodec* av_codec{nullptr}; | ||
| 69 | AVCodecContext* av_codec_ctx{nullptr}; | ||
| 70 | AVBufferRef* av_gpu_decoder{nullptr}; | ||
| 71 | |||
| 72 | GPU& gpu; | ||
| 73 | const Host1x::NvdecCommon::NvdecRegisters& state; | ||
| 74 | std::unique_ptr<Decoder::H264> h264_decoder; | ||
| 75 | std::unique_ptr<Decoder::VP8> vp8_decoder; | ||
| 76 | std::unique_ptr<Decoder::VP9> vp9_decoder; | ||
| 77 | |||
| 78 | std::queue<AVFramePtr> av_frames{}; | ||
| 79 | }; | ||
| 80 | |||
| 81 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp new file mode 100644 index 000000000..95534bc85 --- /dev/null +++ b/src/video_core/host1x/codecs/h264.cpp | |||
| @@ -0,0 +1,277 @@ | |||
| 1 | // SPDX-FileCopyrightText: Ryujinx Team and Contributors | ||
| 2 | // SPDX-License-Identifier: MIT | ||
| 3 | |||
| 4 | #include <array> | ||
| 5 | #include <bit> | ||
| 6 | |||
| 7 | #include "common/settings.h" | ||
| 8 | #include "video_core/gpu.h" | ||
| 9 | #include "video_core/host1x/codecs/h264.h" | ||
| 10 | #include "video_core/memory_manager.h" | ||
| 11 | |||
| 12 | namespace Tegra::Decoder { | ||
| 13 | namespace { | ||
| 14 | // ZigZag LUTs from libavcodec. | ||
| 15 | constexpr std::array<u8, 64> zig_zag_direct{ | ||
| 16 | 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, | ||
| 17 | 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, | ||
| 18 | 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, | ||
| 19 | }; | ||
| 20 | |||
| 21 | constexpr std::array<u8, 16> zig_zag_scan{ | ||
| 22 | 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, | ||
| 23 | 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, | ||
| 24 | }; | ||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 27 | H264::H264(GPU& gpu_) : gpu(gpu_) {} | ||
| 28 | |||
| 29 | H264::~H264() = default; | ||
| 30 | |||
| 31 | const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, | ||
| 32 | bool is_first_frame) { | ||
| 33 | H264DecoderContext context; | ||
| 34 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); | ||
| 35 | |||
| 36 | const s64 frame_number = context.h264_parameter_set.frame_number.Value(); | ||
| 37 | if (!is_first_frame && frame_number != 0) { | ||
| 38 | frame.resize(context.stream_len); | ||
| 39 | gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); | ||
| 40 | return frame; | ||
| 41 | } | ||
| 42 | |||
| 43 | // Encode header | ||
| 44 | H264BitWriter writer{}; | ||
| 45 | writer.WriteU(1, 24); | ||
| 46 | writer.WriteU(0, 1); | ||
| 47 | writer.WriteU(3, 2); | ||
| 48 | writer.WriteU(7, 5); | ||
| 49 | writer.WriteU(100, 8); | ||
| 50 | writer.WriteU(0, 8); | ||
| 51 | writer.WriteU(31, 8); | ||
| 52 | writer.WriteUe(0); | ||
| 53 | const u32 chroma_format_idc = | ||
| 54 | static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value()); | ||
| 55 | writer.WriteUe(chroma_format_idc); | ||
| 56 | if (chroma_format_idc == 3) { | ||
| 57 | writer.WriteBit(false); | ||
| 58 | } | ||
| 59 | |||
| 60 | writer.WriteUe(0); | ||
| 61 | writer.WriteUe(0); | ||
| 62 | writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag | ||
| 63 | writer.WriteBit(false); // Scaling matrix present flag | ||
| 64 | |||
| 65 | writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); | ||
| 66 | |||
| 67 | const auto order_cnt_type = | ||
| 68 | static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value()); | ||
| 69 | writer.WriteUe(order_cnt_type); | ||
| 70 | if (order_cnt_type == 0) { | ||
| 71 | writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); | ||
| 72 | } else if (order_cnt_type == 1) { | ||
| 73 | writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); | ||
| 74 | |||
| 75 | writer.WriteSe(0); | ||
| 76 | writer.WriteSe(0); | ||
| 77 | writer.WriteUe(0); | ||
| 78 | } | ||
| 79 | |||
| 80 | const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / | ||
| 81 | (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); | ||
| 82 | |||
| 83 | // TODO (ameerj): Where do we get this number, it seems to be particular for each stream | ||
| 84 | const auto nvdec_decoding = Settings::values.nvdec_emulation.GetValue(); | ||
| 85 | const bool uses_gpu_decoding = nvdec_decoding == Settings::NvdecEmulation::GPU; | ||
| 86 | const u32 max_num_ref_frames = uses_gpu_decoding ? 6u : 16u; | ||
| 87 | writer.WriteUe(max_num_ref_frames); | ||
| 88 | writer.WriteBit(false); | ||
| 89 | writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); | ||
| 90 | writer.WriteUe(pic_height - 1); | ||
| 91 | writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); | ||
| 92 | |||
| 93 | if (!context.h264_parameter_set.frame_mbs_only_flag) { | ||
| 94 | writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); | ||
| 95 | } | ||
| 96 | |||
| 97 | writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); | ||
| 98 | writer.WriteBit(false); // Frame cropping flag | ||
| 99 | writer.WriteBit(false); // VUI parameter present flag | ||
| 100 | |||
| 101 | writer.End(); | ||
| 102 | |||
| 103 | // H264 PPS | ||
| 104 | writer.WriteU(1, 24); | ||
| 105 | writer.WriteU(0, 1); | ||
| 106 | writer.WriteU(3, 2); | ||
| 107 | writer.WriteU(8, 5); | ||
| 108 | |||
| 109 | writer.WriteUe(0); | ||
| 110 | writer.WriteUe(0); | ||
| 111 | |||
| 112 | writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); | ||
| 113 | writer.WriteBit(false); | ||
| 114 | writer.WriteUe(0); | ||
| 115 | writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); | ||
| 116 | writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); | ||
| 117 | writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); | ||
| 118 | writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); | ||
| 119 | s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value()); | ||
| 120 | writer.WriteSe(pic_init_qp); | ||
| 121 | writer.WriteSe(0); | ||
| 122 | s32 chroma_qp_index_offset = | ||
| 123 | static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value()); | ||
| 124 | |||
| 125 | writer.WriteSe(chroma_qp_index_offset); | ||
| 126 | writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); | ||
| 127 | writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); | ||
| 128 | writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); | ||
| 129 | writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); | ||
| 130 | |||
| 131 | writer.WriteBit(true); | ||
| 132 | |||
| 133 | for (s32 index = 0; index < 6; index++) { | ||
| 134 | writer.WriteBit(true); | ||
| 135 | std::span<const u8> matrix{context.weight_scale}; | ||
| 136 | writer.WriteScalingList(matrix, index * 16, 16); | ||
| 137 | } | ||
| 138 | |||
| 139 | if (context.h264_parameter_set.transform_8x8_mode_flag) { | ||
| 140 | for (s32 index = 0; index < 2; index++) { | ||
| 141 | writer.WriteBit(true); | ||
| 142 | std::span<const u8> matrix{context.weight_scale_8x8}; | ||
| 143 | writer.WriteScalingList(matrix, index * 64, 64); | ||
| 144 | } | ||
| 145 | } | ||
| 146 | |||
| 147 | s32 chroma_qp_index_offset2 = | ||
| 148 | static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); | ||
| 149 | |||
| 150 | writer.WriteSe(chroma_qp_index_offset2); | ||
| 151 | |||
| 152 | writer.End(); | ||
| 153 | |||
| 154 | const auto& encoded_header = writer.GetByteArray(); | ||
| 155 | frame.resize(encoded_header.size() + context.stream_len); | ||
| 156 | std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); | ||
| 157 | |||
| 158 | gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, | ||
| 159 | frame.data() + encoded_header.size(), context.stream_len); | ||
| 160 | |||
| 161 | return frame; | ||
| 162 | } | ||
| 163 | |||
| 164 | H264BitWriter::H264BitWriter() = default; | ||
| 165 | |||
| 166 | H264BitWriter::~H264BitWriter() = default; | ||
| 167 | |||
| 168 | void H264BitWriter::WriteU(s32 value, s32 value_sz) { | ||
| 169 | WriteBits(value, value_sz); | ||
| 170 | } | ||
| 171 | |||
| 172 | void H264BitWriter::WriteSe(s32 value) { | ||
| 173 | WriteExpGolombCodedInt(value); | ||
| 174 | } | ||
| 175 | |||
| 176 | void H264BitWriter::WriteUe(u32 value) { | ||
| 177 | WriteExpGolombCodedUInt(value); | ||
| 178 | } | ||
| 179 | |||
| 180 | void H264BitWriter::End() { | ||
| 181 | WriteBit(true); | ||
| 182 | Flush(); | ||
| 183 | } | ||
| 184 | |||
| 185 | void H264BitWriter::WriteBit(bool state) { | ||
| 186 | WriteBits(state ? 1 : 0, 1); | ||
| 187 | } | ||
| 188 | |||
| 189 | void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { | ||
| 190 | std::vector<u8> scan(count); | ||
| 191 | if (count == 16) { | ||
| 192 | std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); | ||
| 193 | } else { | ||
| 194 | std::memcpy(scan.data(), zig_zag_direct.data(), scan.size()); | ||
| 195 | } | ||
| 196 | u8 last_scale = 8; | ||
| 197 | |||
| 198 | for (s32 index = 0; index < count; index++) { | ||
| 199 | const u8 value = list[start + scan[index]]; | ||
| 200 | const s32 delta_scale = static_cast<s32>(value - last_scale); | ||
| 201 | |||
| 202 | WriteSe(delta_scale); | ||
| 203 | |||
| 204 | last_scale = value; | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | std::vector<u8>& H264BitWriter::GetByteArray() { | ||
| 209 | return byte_array; | ||
| 210 | } | ||
| 211 | |||
| 212 | const std::vector<u8>& H264BitWriter::GetByteArray() const { | ||
| 213 | return byte_array; | ||
| 214 | } | ||
| 215 | |||
| 216 | void H264BitWriter::WriteBits(s32 value, s32 bit_count) { | ||
| 217 | s32 value_pos = 0; | ||
| 218 | |||
| 219 | s32 remaining = bit_count; | ||
| 220 | |||
| 221 | while (remaining > 0) { | ||
| 222 | s32 copy_size = remaining; | ||
| 223 | |||
| 224 | const s32 free_bits = GetFreeBufferBits(); | ||
| 225 | |||
| 226 | if (copy_size > free_bits) { | ||
| 227 | copy_size = free_bits; | ||
| 228 | } | ||
| 229 | |||
| 230 | const s32 mask = (1 << copy_size) - 1; | ||
| 231 | |||
| 232 | const s32 src_shift = (bit_count - value_pos) - copy_size; | ||
| 233 | const s32 dst_shift = (buffer_size - buffer_pos) - copy_size; | ||
| 234 | |||
| 235 | buffer |= ((value >> src_shift) & mask) << dst_shift; | ||
| 236 | |||
| 237 | value_pos += copy_size; | ||
| 238 | buffer_pos += copy_size; | ||
| 239 | remaining -= copy_size; | ||
| 240 | } | ||
| 241 | } | ||
| 242 | |||
| 243 | void H264BitWriter::WriteExpGolombCodedInt(s32 value) { | ||
| 244 | const s32 sign = value <= 0 ? 0 : 1; | ||
| 245 | if (value < 0) { | ||
| 246 | value = -value; | ||
| 247 | } | ||
| 248 | value = (value << 1) - sign; | ||
| 249 | WriteExpGolombCodedUInt(value); | ||
| 250 | } | ||
| 251 | |||
| 252 | void H264BitWriter::WriteExpGolombCodedUInt(u32 value) { | ||
| 253 | const s32 size = 32 - std::countl_zero(value + 1); | ||
| 254 | WriteBits(1, size); | ||
| 255 | |||
| 256 | value -= (1U << (size - 1)) - 1; | ||
| 257 | WriteBits(static_cast<s32>(value), size - 1); | ||
| 258 | } | ||
| 259 | |||
| 260 | s32 H264BitWriter::GetFreeBufferBits() { | ||
| 261 | if (buffer_pos == buffer_size) { | ||
| 262 | Flush(); | ||
| 263 | } | ||
| 264 | |||
| 265 | return buffer_size - buffer_pos; | ||
| 266 | } | ||
| 267 | |||
| 268 | void H264BitWriter::Flush() { | ||
| 269 | if (buffer_pos == 0) { | ||
| 270 | return; | ||
| 271 | } | ||
| 272 | byte_array.push_back(static_cast<u8>(buffer)); | ||
| 273 | |||
| 274 | buffer = 0; | ||
| 275 | buffer_pos = 0; | ||
| 276 | } | ||
| 277 | } // namespace Tegra::Decoder | ||
diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h new file mode 100644 index 000000000..a98730474 --- /dev/null +++ b/src/video_core/host1x/codecs/h264.h | |||
| @@ -0,0 +1,173 @@ | |||
| 1 | // SPDX-FileCopyrightText: Ryujinx Team and Contributors | ||
| 2 | // SPDX-License-Identifier: MIT | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <span> | ||
| 7 | #include <vector> | ||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_funcs.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/host1x/nvdec_common.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | class GPU; | ||
| 15 | namespace Decoder { | ||
| 16 | |||
| 17 | class H264BitWriter { | ||
| 18 | public: | ||
| 19 | H264BitWriter(); | ||
| 20 | ~H264BitWriter(); | ||
| 21 | |||
| 22 | /// The following Write methods are based on clause 9.1 in the H.264 specification. | ||
| 23 | /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax | ||
| 24 | void WriteU(s32 value, s32 value_sz); | ||
| 25 | void WriteSe(s32 value); | ||
| 26 | void WriteUe(u32 value); | ||
| 27 | |||
| 28 | /// Finalize the bitstream | ||
| 29 | void End(); | ||
| 30 | |||
| 31 | /// append a bit to the stream, equivalent value to the state parameter | ||
| 32 | void WriteBit(bool state); | ||
| 33 | |||
| 34 | /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification | ||
| 35 | /// Writes the scaling matrices of the sream | ||
| 36 | void WriteScalingList(std::span<const u8> list, s32 start, s32 count); | ||
| 37 | |||
| 38 | /// Return the bitstream as a vector. | ||
| 39 | [[nodiscard]] std::vector<u8>& GetByteArray(); | ||
| 40 | [[nodiscard]] const std::vector<u8>& GetByteArray() const; | ||
| 41 | |||
| 42 | private: | ||
| 43 | void WriteBits(s32 value, s32 bit_count); | ||
| 44 | void WriteExpGolombCodedInt(s32 value); | ||
| 45 | void WriteExpGolombCodedUInt(u32 value); | ||
| 46 | [[nodiscard]] s32 GetFreeBufferBits(); | ||
| 47 | void Flush(); | ||
| 48 | |||
| 49 | s32 buffer_size{8}; | ||
| 50 | |||
| 51 | s32 buffer{}; | ||
| 52 | s32 buffer_pos{}; | ||
| 53 | std::vector<u8> byte_array; | ||
| 54 | }; | ||
| 55 | |||
| 56 | class H264 { | ||
| 57 | public: | ||
| 58 | explicit H264(GPU& gpu); | ||
| 59 | ~H264(); | ||
| 60 | |||
| 61 | /// Compose the H264 frame for FFmpeg decoding | ||
| 62 | [[nodiscard]] const std::vector<u8>& ComposeFrame( | ||
| 63 | const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); | ||
| 64 | |||
| 65 | private: | ||
| 66 | std::vector<u8> frame; | ||
| 67 | GPU& gpu; | ||
| 68 | |||
| 69 | struct H264ParameterSet { | ||
| 70 | s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 | ||
| 71 | s32 delta_pic_order_always_zero_flag; ///< 0x04 | ||
| 72 | s32 frame_mbs_only_flag; ///< 0x08 | ||
| 73 | u32 pic_width_in_mbs; ///< 0x0C | ||
| 74 | u32 frame_height_in_map_units; ///< 0x10 | ||
| 75 | union { ///< 0x14 | ||
| 76 | BitField<0, 2, u32> tile_format; | ||
| 77 | BitField<2, 3, u32> gob_height; | ||
| 78 | }; | ||
| 79 | u32 entropy_coding_mode_flag; ///< 0x18 | ||
| 80 | s32 pic_order_present_flag; ///< 0x1C | ||
| 81 | s32 num_refidx_l0_default_active; ///< 0x20 | ||
| 82 | s32 num_refidx_l1_default_active; ///< 0x24 | ||
| 83 | s32 deblocking_filter_control_present_flag; ///< 0x28 | ||
| 84 | s32 redundant_pic_cnt_present_flag; ///< 0x2C | ||
| 85 | u32 transform_8x8_mode_flag; ///< 0x30 | ||
| 86 | u32 pitch_luma; ///< 0x34 | ||
| 87 | u32 pitch_chroma; ///< 0x38 | ||
| 88 | u32 luma_top_offset; ///< 0x3C | ||
| 89 | u32 luma_bot_offset; ///< 0x40 | ||
| 90 | u32 luma_frame_offset; ///< 0x44 | ||
| 91 | u32 chroma_top_offset; ///< 0x48 | ||
| 92 | u32 chroma_bot_offset; ///< 0x4C | ||
| 93 | u32 chroma_frame_offset; ///< 0x50 | ||
| 94 | u32 hist_buffer_size; ///< 0x54 | ||
| 95 | union { ///< 0x58 | ||
| 96 | union { | ||
| 97 | BitField<0, 1, u64> mbaff_frame; | ||
| 98 | BitField<1, 1, u64> direct_8x8_inference; | ||
| 99 | BitField<2, 1, u64> weighted_pred; | ||
| 100 | BitField<3, 1, u64> constrained_intra_pred; | ||
| 101 | BitField<4, 1, u64> ref_pic; | ||
| 102 | BitField<5, 1, u64> field_pic; | ||
| 103 | BitField<6, 1, u64> bottom_field; | ||
| 104 | BitField<7, 1, u64> second_field; | ||
| 105 | } flags; | ||
| 106 | BitField<8, 4, u64> log2_max_frame_num_minus4; | ||
| 107 | BitField<12, 2, u64> chroma_format_idc; | ||
| 108 | BitField<14, 2, u64> pic_order_cnt_type; | ||
| 109 | BitField<16, 6, s64> pic_init_qp_minus26; | ||
| 110 | BitField<22, 5, s64> chroma_qp_index_offset; | ||
| 111 | BitField<27, 5, s64> second_chroma_qp_index_offset; | ||
| 112 | BitField<32, 2, u64> weighted_bipred_idc; | ||
| 113 | BitField<34, 7, u64> curr_pic_idx; | ||
| 114 | BitField<41, 5, u64> curr_col_idx; | ||
| 115 | BitField<46, 16, u64> frame_number; | ||
| 116 | BitField<62, 1, u64> frame_surfaces; | ||
| 117 | BitField<63, 1, u64> output_memory_layout; | ||
| 118 | }; | ||
| 119 | }; | ||
| 120 | static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); | ||
| 121 | |||
| 122 | struct H264DecoderContext { | ||
| 123 | INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000 | ||
| 124 | u32 stream_len; ///< 0x0048 | ||
| 125 | INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C | ||
| 126 | H264ParameterSet h264_parameter_set; ///< 0x0058 | ||
| 127 | INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8 | ||
| 128 | std::array<u8, 0x60> weight_scale; ///< 0x01C0 | ||
| 129 | std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220 | ||
| 130 | }; | ||
| 131 | static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); | ||
| 132 | |||
| 133 | #define ASSERT_POSITION(field_name, position) \ | ||
| 134 | static_assert(offsetof(H264ParameterSet, field_name) == position, \ | ||
| 135 | "Field " #field_name " has invalid position") | ||
| 136 | |||
| 137 | ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); | ||
| 138 | ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); | ||
| 139 | ASSERT_POSITION(frame_mbs_only_flag, 0x08); | ||
| 140 | ASSERT_POSITION(pic_width_in_mbs, 0x0C); | ||
| 141 | ASSERT_POSITION(frame_height_in_map_units, 0x10); | ||
| 142 | ASSERT_POSITION(tile_format, 0x14); | ||
| 143 | ASSERT_POSITION(entropy_coding_mode_flag, 0x18); | ||
| 144 | ASSERT_POSITION(pic_order_present_flag, 0x1C); | ||
| 145 | ASSERT_POSITION(num_refidx_l0_default_active, 0x20); | ||
| 146 | ASSERT_POSITION(num_refidx_l1_default_active, 0x24); | ||
| 147 | ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); | ||
| 148 | ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); | ||
| 149 | ASSERT_POSITION(transform_8x8_mode_flag, 0x30); | ||
| 150 | ASSERT_POSITION(pitch_luma, 0x34); | ||
| 151 | ASSERT_POSITION(pitch_chroma, 0x38); | ||
| 152 | ASSERT_POSITION(luma_top_offset, 0x3C); | ||
| 153 | ASSERT_POSITION(luma_bot_offset, 0x40); | ||
| 154 | ASSERT_POSITION(luma_frame_offset, 0x44); | ||
| 155 | ASSERT_POSITION(chroma_top_offset, 0x48); | ||
| 156 | ASSERT_POSITION(chroma_bot_offset, 0x4C); | ||
| 157 | ASSERT_POSITION(chroma_frame_offset, 0x50); | ||
| 158 | ASSERT_POSITION(hist_buffer_size, 0x54); | ||
| 159 | ASSERT_POSITION(flags, 0x58); | ||
| 160 | #undef ASSERT_POSITION | ||
| 161 | |||
| 162 | #define ASSERT_POSITION(field_name, position) \ | ||
| 163 | static_assert(offsetof(H264DecoderContext, field_name) == position, \ | ||
| 164 | "Field " #field_name " has invalid position") | ||
| 165 | |||
| 166 | ASSERT_POSITION(stream_len, 0x48); | ||
| 167 | ASSERT_POSITION(h264_parameter_set, 0x58); | ||
| 168 | ASSERT_POSITION(weight_scale, 0x1C0); | ||
| 169 | #undef ASSERT_POSITION | ||
| 170 | }; | ||
| 171 | |||
| 172 | } // namespace Decoder | ||
| 173 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp new file mode 100644 index 000000000..aac026e17 --- /dev/null +++ b/src/video_core/host1x/codecs/vp8.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <vector> | ||
| 5 | |||
| 6 | #include "video_core/gpu.h" | ||
| 7 | #include "video_core/host1x/codecs/vp8.h" | ||
| 8 | #include "video_core/memory_manager.h" | ||
| 9 | |||
| 10 | namespace Tegra::Decoder { | ||
| 11 | VP8::VP8(GPU& gpu_) : gpu(gpu_) {} | ||
| 12 | |||
| 13 | VP8::~VP8() = default; | ||
| 14 | |||
| 15 | const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { | ||
| 16 | VP8PictureInfo info; | ||
| 17 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); | ||
| 18 | |||
| 19 | const bool is_key_frame = info.key_frame == 1u; | ||
| 20 | const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size); | ||
| 21 | const size_t header_size = is_key_frame ? 10u : 3u; | ||
| 22 | frame.resize(header_size + bitstream_size); | ||
| 23 | |||
| 24 | // Based on page 30 of the VP8 specification. | ||
| 25 | // https://datatracker.ietf.org/doc/rfc6386/ | ||
| 26 | frame[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes). | ||
| 27 | frame[0] |= static_cast<u8>((info.version & 7u) << 1u); // 3-bit version number | ||
| 28 | frame[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag | ||
| 29 | |||
| 30 | // The next 19-bits are the first partition size | ||
| 31 | frame[0] |= static_cast<u8>((info.first_part_size & 7u) << 5u); | ||
| 32 | frame[1] = static_cast<u8>((info.first_part_size & 0x7f8u) >> 3u); | ||
| 33 | frame[2] = static_cast<u8>((info.first_part_size & 0x7f800u) >> 11u); | ||
| 34 | |||
| 35 | if (is_key_frame) { | ||
| 36 | frame[3] = 0x9du; | ||
| 37 | frame[4] = 0x01u; | ||
| 38 | frame[5] = 0x2au; | ||
| 39 | // TODO(ameerj): Horizontal/Vertical Scale | ||
| 40 | // 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits) | ||
| 41 | frame[6] = static_cast<u8>(info.frame_width & 0xff); | ||
| 42 | frame[7] = static_cast<u8>(((info.frame_width >> 8) & 0x3f)); | ||
| 43 | // 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits) | ||
| 44 | frame[8] = static_cast<u8>(info.frame_height & 0xff); | ||
| 45 | frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f)); | ||
| 46 | } | ||
| 47 | const u64 bitstream_offset = state.frame_bitstream_offset; | ||
| 48 | gpu.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size); | ||
| 49 | |||
| 50 | return frame; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace Tegra::Decoder | ||
diff --git a/src/video_core/host1x/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h new file mode 100644 index 000000000..a1dfa5f03 --- /dev/null +++ b/src/video_core/host1x/codecs/vp8.h | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <array> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_funcs.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/host1x/nvdec_common.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | class GPU; | ||
| 15 | namespace Decoder { | ||
| 16 | |||
| 17 | class VP8 { | ||
| 18 | public: | ||
| 19 | explicit VP8(GPU& gpu); | ||
| 20 | ~VP8(); | ||
| 21 | |||
| 22 | /// Compose the VP8 frame for FFmpeg decoding | ||
| 23 | [[nodiscard]] const std::vector<u8>& ComposeFrame( | ||
| 24 | const Host1x::NvdecCommon::NvdecRegisters& state); | ||
| 25 | |||
| 26 | private: | ||
| 27 | std::vector<u8> frame; | ||
| 28 | GPU& gpu; | ||
| 29 | |||
| 30 | struct VP8PictureInfo { | ||
| 31 | INSERT_PADDING_WORDS_NOINIT(14); | ||
| 32 | u16 frame_width; // actual frame width | ||
| 33 | u16 frame_height; // actual frame height | ||
| 34 | u8 key_frame; | ||
| 35 | u8 version; | ||
| 36 | union { | ||
| 37 | u8 raw; | ||
| 38 | BitField<0, 2, u8> tile_format; | ||
| 39 | BitField<2, 3, u8> gob_height; | ||
| 40 | BitField<5, 3, u8> reserverd_surface_format; | ||
| 41 | }; | ||
| 42 | u8 error_conceal_on; // 1: error conceal on; 0: off | ||
| 43 | u32 first_part_size; // the size of first partition(frame header and mb header partition) | ||
| 44 | u32 hist_buffer_size; // in units of 256 | ||
| 45 | u32 vld_buffer_size; // in units of 1 | ||
| 46 | // Current frame buffers | ||
| 47 | std::array<u32, 2> frame_stride; // [y_c] | ||
| 48 | u32 luma_top_offset; // offset of luma top field in units of 256 | ||
| 49 | u32 luma_bot_offset; // offset of luma bottom field in units of 256 | ||
| 50 | u32 luma_frame_offset; // offset of luma frame in units of 256 | ||
| 51 | u32 chroma_top_offset; // offset of chroma top field in units of 256 | ||
| 52 | u32 chroma_bot_offset; // offset of chroma bottom field in units of 256 | ||
| 53 | u32 chroma_frame_offset; // offset of chroma frame in units of 256 | ||
| 54 | |||
| 55 | INSERT_PADDING_BYTES_NOINIT(0x1c); // NvdecDisplayParams | ||
| 56 | |||
| 57 | // Decode picture buffer related | ||
| 58 | s8 current_output_memory_layout; | ||
| 59 | // output NV12/NV24 setting. index 0: golden; 1: altref; 2: last | ||
| 60 | std::array<s8, 3> output_memory_layout; | ||
| 61 | |||
| 62 | u8 segmentation_feature_data_update; | ||
| 63 | INSERT_PADDING_BYTES_NOINIT(3); | ||
| 64 | |||
| 65 | // ucode return result | ||
| 66 | u32 result_value; | ||
| 67 | std::array<u32, 8> partition_offset; | ||
| 68 | INSERT_PADDING_WORDS_NOINIT(3); | ||
| 69 | }; | ||
| 70 | static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size"); | ||
| 71 | }; | ||
| 72 | |||
| 73 | } // namespace Decoder | ||
| 74 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp new file mode 100644 index 000000000..bc50c6ba4 --- /dev/null +++ b/src/video_core/host1x/codecs/vp9.cpp | |||
| @@ -0,0 +1,946 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <algorithm> // for std::copy | ||
| 5 | #include <numeric> | ||
| 6 | #include "common/assert.h" | ||
| 7 | #include "video_core/gpu.h" | ||
| 8 | #include "video_core/host1x/codecs/vp9.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | |||
| 11 | namespace Tegra::Decoder { | ||
| 12 | namespace { | ||
| 13 | constexpr u32 diff_update_probability = 252; | ||
| 14 | constexpr u32 frame_sync_code = 0x498342; | ||
| 15 | |||
| 16 | // Default compressed header probabilities once frame context resets | ||
| 17 | constexpr Vp9EntropyProbs default_probs{ | ||
| 18 | .y_mode_prob{ | ||
| 19 | 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78, | ||
| 20 | 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29, | ||
| 21 | }, | ||
| 22 | .partition_prob{ | ||
| 23 | 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0, | ||
| 24 | 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0, | ||
| 25 | 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0, | ||
| 26 | 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0, | ||
| 27 | }, | ||
| 28 | .coef_probs{ | ||
| 29 | 195, 29, 183, 84, 49, 136, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 30 | 31, 107, 169, 35, 99, 159, 17, 82, 140, 8, 66, 114, 2, 44, 76, 1, 19, 32, | ||
| 31 | 40, 132, 201, 29, 114, 187, 13, 91, 157, 7, 75, 127, 3, 58, 95, 1, 28, 47, | ||
| 32 | 69, 142, 221, 42, 122, 201, 15, 91, 159, 6, 67, 121, 1, 42, 77, 1, 17, 31, | ||
| 33 | 102, 148, 228, 67, 117, 204, 17, 82, 154, 6, 59, 114, 2, 39, 75, 1, 15, 29, | ||
| 34 | 156, 57, 233, 119, 57, 212, 58, 48, 163, 29, 40, 124, 12, 30, 81, 3, 12, 31, | ||
| 35 | 191, 107, 226, 124, 117, 204, 25, 99, 155, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 36 | 29, 148, 210, 37, 126, 194, 8, 93, 157, 2, 68, 118, 1, 39, 69, 1, 17, 33, | ||
| 37 | 41, 151, 213, 27, 123, 193, 3, 82, 144, 1, 58, 105, 1, 32, 60, 1, 13, 26, | ||
| 38 | 59, 159, 220, 23, 126, 198, 4, 88, 151, 1, 66, 114, 1, 38, 71, 1, 18, 34, | ||
| 39 | 114, 136, 232, 51, 114, 207, 11, 83, 155, 3, 56, 105, 1, 33, 65, 1, 17, 34, | ||
| 40 | 149, 65, 234, 121, 57, 215, 61, 49, 166, 28, 36, 114, 12, 25, 76, 3, 16, 42, | ||
| 41 | 214, 49, 220, 132, 63, 188, 42, 65, 137, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 42 | 85, 137, 221, 104, 131, 216, 49, 111, 192, 21, 87, 155, 2, 49, 87, 1, 16, 28, | ||
| 43 | 89, 163, 230, 90, 137, 220, 29, 100, 183, 10, 70, 135, 2, 42, 81, 1, 17, 33, | ||
| 44 | 108, 167, 237, 55, 133, 222, 15, 97, 179, 4, 72, 135, 1, 45, 85, 1, 19, 38, | ||
| 45 | 124, 146, 240, 66, 124, 224, 17, 88, 175, 4, 58, 122, 1, 36, 75, 1, 18, 37, | ||
| 46 | 141, 79, 241, 126, 70, 227, 66, 58, 182, 30, 44, 136, 12, 34, 96, 2, 20, 47, | ||
| 47 | 229, 99, 249, 143, 111, 235, 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 48 | 82, 158, 236, 94, 146, 224, 25, 117, 191, 9, 87, 149, 3, 56, 99, 1, 33, 57, | ||
| 49 | 83, 167, 237, 68, 145, 222, 10, 103, 177, 2, 72, 131, 1, 41, 79, 1, 20, 39, | ||
| 50 | 99, 167, 239, 47, 141, 224, 10, 104, 178, 2, 73, 133, 1, 44, 85, 1, 22, 47, | ||
| 51 | 127, 145, 243, 71, 129, 228, 17, 93, 177, 3, 61, 124, 1, 41, 84, 1, 21, 52, | ||
| 52 | 157, 78, 244, 140, 72, 231, 69, 58, 184, 31, 44, 137, 14, 38, 105, 8, 23, 61, | ||
| 53 | 125, 34, 187, 52, 41, 133, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 54 | 37, 109, 153, 51, 102, 147, 23, 87, 128, 8, 67, 101, 1, 41, 63, 1, 19, 29, | ||
| 55 | 31, 154, 185, 17, 127, 175, 6, 96, 145, 2, 73, 114, 1, 51, 82, 1, 28, 45, | ||
| 56 | 23, 163, 200, 10, 131, 185, 2, 93, 148, 1, 67, 111, 1, 41, 69, 1, 14, 24, | ||
| 57 | 29, 176, 217, 12, 145, 201, 3, 101, 156, 1, 69, 111, 1, 39, 63, 1, 14, 23, | ||
| 58 | 57, 192, 233, 25, 154, 215, 6, 109, 167, 3, 78, 118, 1, 48, 69, 1, 21, 29, | ||
| 59 | 202, 105, 245, 108, 106, 216, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 60 | 33, 172, 219, 64, 149, 206, 14, 117, 177, 5, 90, 141, 2, 61, 95, 1, 37, 57, | ||
| 61 | 33, 179, 220, 11, 140, 198, 1, 89, 148, 1, 60, 104, 1, 33, 57, 1, 12, 21, | ||
| 62 | 30, 181, 221, 8, 141, 198, 1, 87, 145, 1, 58, 100, 1, 31, 55, 1, 12, 20, | ||
| 63 | 32, 186, 224, 7, 142, 198, 1, 86, 143, 1, 58, 100, 1, 31, 55, 1, 12, 22, | ||
| 64 | 57, 192, 227, 20, 143, 204, 3, 96, 154, 1, 68, 112, 1, 42, 69, 1, 19, 32, | ||
| 65 | 212, 35, 215, 113, 47, 169, 29, 48, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 66 | 74, 129, 203, 106, 120, 203, 49, 107, 178, 19, 84, 144, 4, 50, 84, 1, 15, 25, | ||
| 67 | 71, 172, 217, 44, 141, 209, 15, 102, 173, 6, 76, 133, 2, 51, 89, 1, 24, 42, | ||
| 68 | 64, 185, 231, 31, 148, 216, 8, 103, 175, 3, 74, 131, 1, 46, 81, 1, 18, 30, | ||
| 69 | 65, 196, 235, 25, 157, 221, 5, 105, 174, 1, 67, 120, 1, 38, 69, 1, 15, 30, | ||
| 70 | 65, 204, 238, 30, 156, 224, 7, 107, 177, 2, 70, 124, 1, 42, 73, 1, 18, 34, | ||
| 71 | 225, 86, 251, 144, 104, 235, 42, 99, 181, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 72 | 85, 175, 239, 112, 165, 229, 29, 136, 200, 12, 103, 162, 6, 77, 123, 2, 53, 84, | ||
| 73 | 75, 183, 239, 30, 155, 221, 3, 106, 171, 1, 74, 128, 1, 44, 76, 1, 17, 28, | ||
| 74 | 73, 185, 240, 27, 159, 222, 2, 107, 172, 1, 75, 127, 1, 42, 73, 1, 17, 29, | ||
| 75 | 62, 190, 238, 21, 159, 222, 2, 107, 172, 1, 72, 122, 1, 40, 71, 1, 18, 32, | ||
| 76 | 61, 199, 240, 27, 161, 226, 4, 113, 180, 1, 76, 129, 1, 46, 80, 1, 23, 41, | ||
| 77 | 7, 27, 153, 5, 30, 95, 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 78 | 50, 75, 127, 57, 75, 124, 27, 67, 108, 10, 54, 86, 1, 33, 52, 1, 12, 18, | ||
| 79 | 43, 125, 151, 26, 108, 148, 7, 83, 122, 2, 59, 89, 1, 38, 60, 1, 17, 27, | ||
| 80 | 23, 144, 163, 13, 112, 154, 2, 75, 117, 1, 50, 81, 1, 31, 51, 1, 14, 23, | ||
| 81 | 18, 162, 185, 6, 123, 171, 1, 78, 125, 1, 51, 86, 1, 31, 54, 1, 14, 23, | ||
| 82 | 15, 199, 227, 3, 150, 204, 1, 91, 146, 1, 55, 95, 1, 30, 53, 1, 11, 20, | ||
| 83 | 19, 55, 240, 19, 59, 196, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 84 | 41, 166, 207, 104, 153, 199, 31, 123, 181, 14, 101, 152, 5, 72, 106, 1, 36, 52, | ||
| 85 | 35, 176, 211, 12, 131, 190, 2, 88, 144, 1, 60, 101, 1, 36, 60, 1, 16, 28, | ||
| 86 | 28, 183, 213, 8, 134, 191, 1, 86, 142, 1, 56, 96, 1, 30, 53, 1, 12, 20, | ||
| 87 | 20, 190, 215, 4, 135, 192, 1, 84, 139, 1, 53, 91, 1, 28, 49, 1, 11, 20, | ||
| 88 | 13, 196, 216, 2, 137, 192, 1, 86, 143, 1, 57, 99, 1, 32, 56, 1, 13, 24, | ||
| 89 | 211, 29, 217, 96, 47, 156, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 90 | 78, 120, 193, 111, 116, 186, 46, 102, 164, 15, 80, 128, 2, 49, 76, 1, 18, 28, | ||
| 91 | 71, 161, 203, 42, 132, 192, 10, 98, 150, 3, 69, 109, 1, 44, 70, 1, 18, 29, | ||
| 92 | 57, 186, 211, 30, 140, 196, 4, 93, 146, 1, 62, 102, 1, 38, 65, 1, 16, 27, | ||
| 93 | 47, 199, 217, 14, 145, 196, 1, 88, 142, 1, 57, 98, 1, 36, 62, 1, 15, 26, | ||
| 94 | 26, 219, 229, 5, 155, 207, 1, 94, 151, 1, 60, 104, 1, 36, 62, 1, 16, 28, | ||
| 95 | 233, 29, 248, 146, 47, 220, 43, 52, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 96 | 100, 163, 232, 179, 161, 222, 63, 142, 204, 37, 113, 174, 26, 89, 137, 18, 68, 97, | ||
| 97 | 85, 181, 230, 32, 146, 209, 7, 100, 164, 3, 71, 121, 1, 45, 77, 1, 18, 30, | ||
| 98 | 65, 187, 230, 20, 148, 207, 2, 97, 159, 1, 68, 116, 1, 40, 70, 1, 14, 29, | ||
| 99 | 40, 194, 227, 8, 147, 204, 1, 94, 155, 1, 65, 112, 1, 39, 66, 1, 14, 26, | ||
| 100 | 16, 208, 228, 3, 151, 207, 1, 98, 160, 1, 67, 117, 1, 41, 74, 1, 17, 31, | ||
| 101 | 17, 38, 140, 7, 34, 80, 1, 17, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 102 | 37, 75, 128, 41, 76, 128, 26, 66, 116, 12, 52, 94, 2, 32, 55, 1, 10, 16, | ||
| 103 | 50, 127, 154, 37, 109, 152, 16, 82, 121, 5, 59, 85, 1, 35, 54, 1, 13, 20, | ||
| 104 | 40, 142, 167, 17, 110, 157, 2, 71, 112, 1, 44, 72, 1, 27, 45, 1, 11, 17, | ||
| 105 | 30, 175, 188, 9, 124, 169, 1, 74, 116, 1, 48, 78, 1, 30, 49, 1, 11, 18, | ||
| 106 | 10, 222, 223, 2, 150, 194, 1, 83, 128, 1, 48, 79, 1, 27, 45, 1, 11, 17, | ||
| 107 | 36, 41, 235, 29, 36, 193, 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 108 | 85, 165, 222, 177, 162, 215, 110, 135, 195, 57, 113, 168, 23, 83, 120, 10, 49, 61, | ||
| 109 | 85, 190, 223, 36, 139, 200, 5, 90, 146, 1, 60, 103, 1, 38, 65, 1, 18, 30, | ||
| 110 | 72, 202, 223, 23, 141, 199, 2, 86, 140, 1, 56, 97, 1, 36, 61, 1, 16, 27, | ||
| 111 | 55, 218, 225, 13, 145, 200, 1, 86, 141, 1, 57, 99, 1, 35, 61, 1, 13, 22, | ||
| 112 | 15, 235, 212, 1, 132, 184, 1, 84, 139, 1, 57, 97, 1, 34, 56, 1, 14, 23, | ||
| 113 | 181, 21, 201, 61, 37, 123, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 114 | 47, 106, 172, 95, 104, 173, 42, 93, 159, 18, 77, 131, 4, 50, 81, 1, 17, 23, | ||
| 115 | 62, 147, 199, 44, 130, 189, 28, 102, 154, 18, 75, 115, 2, 44, 65, 1, 12, 19, | ||
| 116 | 55, 153, 210, 24, 130, 194, 3, 93, 146, 1, 61, 97, 1, 31, 50, 1, 10, 16, | ||
| 117 | 49, 186, 223, 17, 148, 204, 1, 96, 142, 1, 53, 83, 1, 26, 44, 1, 11, 17, | ||
| 118 | 13, 217, 212, 2, 136, 180, 1, 78, 124, 1, 50, 83, 1, 29, 49, 1, 14, 23, | ||
| 119 | 197, 13, 247, 82, 17, 222, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 120 | 126, 186, 247, 234, 191, 243, 176, 177, 234, 104, 158, 220, 66, 128, 186, 55, 90, 137, | ||
| 121 | 111, 197, 242, 46, 158, 219, 9, 104, 171, 2, 65, 125, 1, 44, 80, 1, 17, 91, | ||
| 122 | 104, 208, 245, 39, 168, 224, 3, 109, 162, 1, 79, 124, 1, 50, 102, 1, 43, 102, | ||
| 123 | 84, 220, 246, 31, 177, 231, 2, 115, 180, 1, 79, 134, 1, 55, 77, 1, 60, 79, | ||
| 124 | 43, 243, 240, 8, 180, 217, 1, 115, 166, 1, 84, 121, 1, 51, 67, 1, 16, 6, | ||
| 125 | }, | ||
| 126 | .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144}, | ||
| 127 | .inter_mode_prob{ | ||
| 128 | 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94, | ||
| 129 | 66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0, | ||
| 130 | }, | ||
| 131 | .intra_inter_prob{9, 102, 187, 225}, | ||
| 132 | .comp_inter_prob{9, 102, 187, 225, 0}, | ||
| 133 | .single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247}, | ||
| 134 | .comp_ref_prob{50, 126, 123, 221, 226}, | ||
| 135 | .tx_32x32_prob{3, 136, 37, 5, 52, 13}, | ||
| 136 | .tx_16x16_prob{20, 152, 15, 101}, | ||
| 137 | .tx_8x8_prob{100, 66}, | ||
| 138 | .skip_probs{192, 128, 64}, | ||
| 139 | .joints{32, 64, 96}, | ||
| 140 | .sign{128, 128}, | ||
| 141 | .classes{ | ||
| 142 | 224, 144, 192, 168, 192, 176, 192, 198, 198, 245, | ||
| 143 | 216, 128, 176, 160, 176, 176, 192, 198, 198, 208, | ||
| 144 | }, | ||
| 145 | .class_0{216, 208}, | ||
| 146 | .prob_bits{ | ||
| 147 | 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, | ||
| 148 | 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, | ||
| 149 | }, | ||
| 150 | .class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64}, | ||
| 151 | .fr{64, 96, 64, 64, 96, 64}, | ||
| 152 | .class_0_hp{160, 160}, | ||
| 153 | .high_precision{128, 128}, | ||
| 154 | }; | ||
| 155 | |||
| 156 | constexpr std::array<u8, 256> norm_lut{ | ||
| 157 | 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
| 158 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | ||
| 159 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
| 160 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
| 161 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 162 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 163 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 164 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
| 165 | }; | ||
| 166 | |||
| 167 | constexpr std::array<u8, 254> map_lut{ | ||
| 168 | 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, | ||
| 169 | 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54, | ||
| 170 | 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, | ||
| 171 | 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89, | ||
| 172 | 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, | ||
| 173 | 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, | ||
| 174 | 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, | ||
| 175 | 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, | ||
| 176 | 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177, | ||
| 177 | 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, | ||
| 178 | 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, | ||
| 179 | 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17, | ||
| 180 | 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247, | ||
| 181 | 248, 249, 250, 251, 252, 253, 19, | ||
| 182 | }; | ||
| 183 | |||
| 184 | // 6.2.14 Tile size calculation | ||
| 185 | |||
| 186 | [[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) { | ||
| 187 | const s32 sb64_cols = (frame_width + 63) / 64; | ||
| 188 | s32 min_log2 = 0; | ||
| 189 | |||
| 190 | while ((64 << min_log2) < sb64_cols) { | ||
| 191 | min_log2++; | ||
| 192 | } | ||
| 193 | |||
| 194 | return min_log2; | ||
| 195 | } | ||
| 196 | |||
| 197 | [[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) { | ||
| 198 | const s32 sb64_cols = (frame_width + 63) / 64; | ||
| 199 | s32 max_log2 = 1; | ||
| 200 | |||
| 201 | while ((sb64_cols >> max_log2) >= 4) { | ||
| 202 | max_log2++; | ||
| 203 | } | ||
| 204 | |||
| 205 | return max_log2 - 1; | ||
| 206 | } | ||
| 207 | |||
| 208 | // Recenters probability. Based on section 6.3.6 of VP9 Specification | ||
| 209 | [[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) { | ||
| 210 | if (new_prob > old_prob * 2) { | ||
| 211 | return new_prob; | ||
| 212 | } | ||
| 213 | |||
| 214 | if (new_prob >= old_prob) { | ||
| 215 | return (new_prob - old_prob) * 2; | ||
| 216 | } | ||
| 217 | |||
| 218 | return (old_prob - new_prob) * 2 - 1; | ||
| 219 | } | ||
| 220 | |||
| 221 | // Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification | ||
| 222 | [[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) { | ||
| 223 | new_prob--; | ||
| 224 | old_prob--; | ||
| 225 | |||
| 226 | std::size_t index{}; | ||
| 227 | |||
| 228 | if (old_prob * 2 <= 0xff) { | ||
| 229 | index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1)); | ||
| 230 | } else { | ||
| 231 | index = static_cast<std::size_t>( | ||
| 232 | std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); | ||
| 233 | } | ||
| 234 | |||
| 235 | return static_cast<s32>(map_lut[index]); | ||
| 236 | } | ||
| 237 | } // Anonymous namespace | ||
| 238 | |||
| 239 | VP9::VP9(GPU& gpu_) : gpu{gpu_} {} | ||
| 240 | |||
| 241 | VP9::~VP9() = default; | ||
| 242 | |||
| 243 | void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { | ||
| 244 | const bool update = new_prob != old_prob; | ||
| 245 | |||
| 246 | writer.Write(update, diff_update_probability); | ||
| 247 | |||
| 248 | if (update) { | ||
| 249 | WriteProbabilityDelta(writer, new_prob, old_prob); | ||
| 250 | } | ||
| 251 | } | ||
| 252 | template <typename T, std::size_t N> | ||
| 253 | void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, | ||
| 254 | const std::array<T, N>& old_prob) { | ||
| 255 | for (std::size_t offset = 0; offset < new_prob.size(); ++offset) { | ||
| 256 | WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | template <typename T, std::size_t N> | ||
| 261 | void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, | ||
| 262 | const std::array<T, N>& old_prob) { | ||
| 263 | for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) { | ||
| 264 | WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]); | ||
| 265 | WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]); | ||
| 266 | WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]); | ||
| 267 | } | ||
| 268 | } | ||
| 269 | |||
| 270 | void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { | ||
| 271 | const int delta = RemapProbability(new_prob, old_prob); | ||
| 272 | |||
| 273 | EncodeTermSubExp(writer, delta); | ||
| 274 | } | ||
| 275 | |||
| 276 | void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) { | ||
| 277 | if (WriteLessThan(writer, value, 16)) { | ||
| 278 | writer.Write(value, 4); | ||
| 279 | } else if (WriteLessThan(writer, value, 32)) { | ||
| 280 | writer.Write(value - 16, 4); | ||
| 281 | } else if (WriteLessThan(writer, value, 64)) { | ||
| 282 | writer.Write(value - 32, 5); | ||
| 283 | } else { | ||
| 284 | value -= 64; | ||
| 285 | |||
| 286 | constexpr s32 size = 8; | ||
| 287 | |||
| 288 | const s32 mask = (1 << size) - 191; | ||
| 289 | |||
| 290 | const s32 delta = value - mask; | ||
| 291 | |||
| 292 | if (delta < 0) { | ||
| 293 | writer.Write(value, size - 1); | ||
| 294 | } else { | ||
| 295 | writer.Write(delta / 2 + mask, size - 1); | ||
| 296 | writer.Write(delta & 1, 1); | ||
| 297 | } | ||
| 298 | } | ||
| 299 | } | ||
| 300 | |||
| 301 | bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) { | ||
| 302 | const bool is_lt = value < test; | ||
| 303 | writer.Write(!is_lt); | ||
| 304 | return is_lt; | ||
| 305 | } | ||
| 306 | |||
| 307 | void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, | ||
| 308 | const std::array<u8, 1728>& new_prob, | ||
| 309 | const std::array<u8, 1728>& old_prob) { | ||
| 310 | constexpr u32 block_bytes = 2 * 2 * 6 * 6 * 3; | ||
| 311 | |||
| 312 | const auto needs_update = [&](u32 base_index) { | ||
| 313 | return !std::equal(new_prob.begin() + base_index, | ||
| 314 | new_prob.begin() + base_index + block_bytes, | ||
| 315 | old_prob.begin() + base_index); | ||
| 316 | }; | ||
| 317 | |||
| 318 | for (u32 block_index = 0; block_index < 4; block_index++) { | ||
| 319 | const u32 base_index = block_index * block_bytes; | ||
| 320 | const bool update = needs_update(base_index); | ||
| 321 | writer.Write(update); | ||
| 322 | |||
| 323 | if (update) { | ||
| 324 | u32 index = base_index; | ||
| 325 | for (s32 i = 0; i < 2; i++) { | ||
| 326 | for (s32 j = 0; j < 2; j++) { | ||
| 327 | for (s32 k = 0; k < 6; k++) { | ||
| 328 | for (s32 l = 0; l < 6; l++) { | ||
| 329 | if (k != 0 || l < 3) { | ||
| 330 | WriteProbabilityUpdate(writer, new_prob[index + 0], | ||
| 331 | old_prob[index + 0]); | ||
| 332 | WriteProbabilityUpdate(writer, new_prob[index + 1], | ||
| 333 | old_prob[index + 1]); | ||
| 334 | WriteProbabilityUpdate(writer, new_prob[index + 2], | ||
| 335 | old_prob[index + 2]); | ||
| 336 | } | ||
| 337 | index += 3; | ||
| 338 | } | ||
| 339 | } | ||
| 340 | } | ||
| 341 | } | ||
| 342 | } | ||
| 343 | if (block_index == static_cast<u32>(tx_mode)) { | ||
| 344 | break; | ||
| 345 | } | ||
| 346 | } | ||
| 347 | } | ||
| 348 | |||
| 349 | void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) { | ||
| 350 | const bool update = new_prob != old_prob; | ||
| 351 | writer.Write(update, diff_update_probability); | ||
| 352 | |||
| 353 | if (update) { | ||
| 354 | writer.Write(new_prob >> 1, 7); | ||
| 355 | } | ||
| 356 | } | ||
| 357 | |||
| 358 | Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) { | ||
| 359 | PictureInfo picture_info; | ||
| 360 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); | ||
| 361 | Vp9PictureInfo vp9_info = picture_info.Convert(); | ||
| 362 | |||
| 363 | InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); | ||
| 364 | |||
| 365 | // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following | ||
| 366 | // order: last, golden, altref, current. | ||
| 367 | std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4, | ||
| 368 | vp9_info.frame_offsets.begin()); | ||
| 369 | |||
| 370 | return vp9_info; | ||
| 371 | } | ||
| 372 | |||
| 373 | void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { | ||
| 374 | EntropyProbs entropy; | ||
| 375 | gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); | ||
| 376 | entropy.Convert(dst); | ||
| 377 | } | ||
| 378 | |||
| 379 | Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { | ||
| 380 | Vp9FrameContainer current_frame{}; | ||
| 381 | { | ||
| 382 | gpu.SyncGuestHost(); | ||
| 383 | current_frame.info = GetVp9PictureInfo(state); | ||
| 384 | current_frame.bit_stream.resize(current_frame.info.bitstream_size); | ||
| 385 | gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(), | ||
| 386 | current_frame.info.bitstream_size); | ||
| 387 | } | ||
| 388 | if (!next_frame.bit_stream.empty()) { | ||
| 389 | Vp9FrameContainer temp{ | ||
| 390 | .info = current_frame.info, | ||
| 391 | .bit_stream = std::move(current_frame.bit_stream), | ||
| 392 | }; | ||
| 393 | next_frame.info.show_frame = current_frame.info.last_frame_shown; | ||
| 394 | current_frame.info = next_frame.info; | ||
| 395 | current_frame.bit_stream = std::move(next_frame.bit_stream); | ||
| 396 | next_frame = std::move(temp); | ||
| 397 | } else { | ||
| 398 | next_frame.info = current_frame.info; | ||
| 399 | next_frame.bit_stream = current_frame.bit_stream; | ||
| 400 | } | ||
| 401 | return current_frame; | ||
| 402 | } | ||
| 403 | |||
| 404 | std::vector<u8> VP9::ComposeCompressedHeader() { | ||
| 405 | VpxRangeEncoder writer{}; | ||
| 406 | const bool update_probs = !current_frame_info.is_key_frame && current_frame_info.show_frame; | ||
| 407 | if (!current_frame_info.lossless) { | ||
| 408 | if (static_cast<u32>(current_frame_info.transform_mode) >= 3) { | ||
| 409 | writer.Write(3, 2); | ||
| 410 | writer.Write(current_frame_info.transform_mode == 4); | ||
| 411 | } else { | ||
| 412 | writer.Write(current_frame_info.transform_mode, 2); | ||
| 413 | } | ||
| 414 | } | ||
| 415 | |||
| 416 | if (current_frame_info.transform_mode == 4) { | ||
| 417 | // tx_mode_probs() in the spec | ||
| 418 | WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob, | ||
| 419 | prev_frame_probs.tx_8x8_prob); | ||
| 420 | WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob, | ||
| 421 | prev_frame_probs.tx_16x16_prob); | ||
| 422 | WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob, | ||
| 423 | prev_frame_probs.tx_32x32_prob); | ||
| 424 | if (update_probs) { | ||
| 425 | prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob; | ||
| 426 | prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob; | ||
| 427 | prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob; | ||
| 428 | } | ||
| 429 | } | ||
| 430 | // read_coef_probs() in the spec | ||
| 431 | WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode, | ||
| 432 | current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs); | ||
| 433 | // read_skip_probs() in the spec | ||
| 434 | WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs, | ||
| 435 | prev_frame_probs.skip_probs); | ||
| 436 | |||
| 437 | if (update_probs) { | ||
| 438 | prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs; | ||
| 439 | prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs; | ||
| 440 | } | ||
| 441 | |||
| 442 | if (!current_frame_info.intra_only) { | ||
| 443 | // read_inter_probs() in the spec | ||
| 444 | WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob, | ||
| 445 | prev_frame_probs.inter_mode_prob); | ||
| 446 | |||
| 447 | if (current_frame_info.interp_filter == 4) { | ||
| 448 | // read_interp_filter_probs() in the spec | ||
| 449 | WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob, | ||
| 450 | prev_frame_probs.switchable_interp_prob); | ||
| 451 | if (update_probs) { | ||
| 452 | prev_frame_probs.switchable_interp_prob = | ||
| 453 | current_frame_info.entropy.switchable_interp_prob; | ||
| 454 | } | ||
| 455 | } | ||
| 456 | |||
| 457 | // read_is_inter_probs() in the spec | ||
| 458 | WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob, | ||
| 459 | prev_frame_probs.intra_inter_prob); | ||
| 460 | |||
| 461 | // frame_reference_mode() in the spec | ||
| 462 | if ((current_frame_info.ref_frame_sign_bias[1] & 1) != | ||
| 463 | (current_frame_info.ref_frame_sign_bias[2] & 1) || | ||
| 464 | (current_frame_info.ref_frame_sign_bias[1] & 1) != | ||
| 465 | (current_frame_info.ref_frame_sign_bias[3] & 1)) { | ||
| 466 | if (current_frame_info.reference_mode >= 1) { | ||
| 467 | writer.Write(1, 1); | ||
| 468 | writer.Write(current_frame_info.reference_mode == 2); | ||
| 469 | } else { | ||
| 470 | writer.Write(0, 1); | ||
| 471 | } | ||
| 472 | } | ||
| 473 | |||
| 474 | // frame_reference_mode_probs() in the spec | ||
| 475 | if (current_frame_info.reference_mode == 2) { | ||
| 476 | WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob, | ||
| 477 | prev_frame_probs.comp_inter_prob); | ||
| 478 | if (update_probs) { | ||
| 479 | prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob; | ||
| 480 | } | ||
| 481 | } | ||
| 482 | |||
| 483 | if (current_frame_info.reference_mode != 1) { | ||
| 484 | WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob, | ||
| 485 | prev_frame_probs.single_ref_prob); | ||
| 486 | if (update_probs) { | ||
| 487 | prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob; | ||
| 488 | } | ||
| 489 | } | ||
| 490 | |||
| 491 | if (current_frame_info.reference_mode != 0) { | ||
| 492 | WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob, | ||
| 493 | prev_frame_probs.comp_ref_prob); | ||
| 494 | if (update_probs) { | ||
| 495 | prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob; | ||
| 496 | } | ||
| 497 | } | ||
| 498 | |||
| 499 | // read_y_mode_probs | ||
| 500 | for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size(); | ||
| 501 | ++index) { | ||
| 502 | WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index], | ||
| 503 | prev_frame_probs.y_mode_prob[index]); | ||
| 504 | } | ||
| 505 | |||
| 506 | // read_partition_probs | ||
| 507 | WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob, | ||
| 508 | prev_frame_probs.partition_prob); | ||
| 509 | |||
| 510 | // mv_probs | ||
| 511 | for (s32 i = 0; i < 3; i++) { | ||
| 512 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i], | ||
| 513 | prev_frame_probs.joints[i]); | ||
| 514 | } | ||
| 515 | if (update_probs) { | ||
| 516 | prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob; | ||
| 517 | prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob; | ||
| 518 | prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob; | ||
| 519 | prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob; | ||
| 520 | prev_frame_probs.joints = current_frame_info.entropy.joints; | ||
| 521 | } | ||
| 522 | |||
| 523 | for (s32 i = 0; i < 2; i++) { | ||
| 524 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i], | ||
| 525 | prev_frame_probs.sign[i]); | ||
| 526 | for (s32 j = 0; j < 10; j++) { | ||
| 527 | const int index = i * 10 + j; | ||
| 528 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index], | ||
| 529 | prev_frame_probs.classes[index]); | ||
| 530 | } | ||
| 531 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i], | ||
| 532 | prev_frame_probs.class_0[i]); | ||
| 533 | |||
| 534 | for (s32 j = 0; j < 10; j++) { | ||
| 535 | const int index = i * 10 + j; | ||
| 536 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index], | ||
| 537 | prev_frame_probs.prob_bits[index]); | ||
| 538 | } | ||
| 539 | } | ||
| 540 | |||
| 541 | for (s32 i = 0; i < 2; i++) { | ||
| 542 | for (s32 j = 0; j < 2; j++) { | ||
| 543 | for (s32 k = 0; k < 3; k++) { | ||
| 544 | const int index = i * 2 * 3 + j * 3 + k; | ||
| 545 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index], | ||
| 546 | prev_frame_probs.class_0_fr[index]); | ||
| 547 | } | ||
| 548 | } | ||
| 549 | |||
| 550 | for (s32 j = 0; j < 3; j++) { | ||
| 551 | const int index = i * 3 + j; | ||
| 552 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index], | ||
| 553 | prev_frame_probs.fr[index]); | ||
| 554 | } | ||
| 555 | } | ||
| 556 | |||
| 557 | if (current_frame_info.allow_high_precision_mv) { | ||
| 558 | for (s32 index = 0; index < 2; index++) { | ||
| 559 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index], | ||
| 560 | prev_frame_probs.class_0_hp[index]); | ||
| 561 | WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index], | ||
| 562 | prev_frame_probs.high_precision[index]); | ||
| 563 | } | ||
| 564 | } | ||
| 565 | |||
| 566 | // save previous probs | ||
| 567 | if (update_probs) { | ||
| 568 | prev_frame_probs.sign = current_frame_info.entropy.sign; | ||
| 569 | prev_frame_probs.classes = current_frame_info.entropy.classes; | ||
| 570 | prev_frame_probs.class_0 = current_frame_info.entropy.class_0; | ||
| 571 | prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits; | ||
| 572 | prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr; | ||
| 573 | prev_frame_probs.fr = current_frame_info.entropy.fr; | ||
| 574 | prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp; | ||
| 575 | prev_frame_probs.high_precision = current_frame_info.entropy.high_precision; | ||
| 576 | } | ||
| 577 | } | ||
| 578 | writer.End(); | ||
| 579 | return writer.GetBuffer(); | ||
| 580 | } | ||
| 581 | |||
| 582 | VpxBitStreamWriter VP9::ComposeUncompressedHeader() { | ||
| 583 | VpxBitStreamWriter uncomp_writer{}; | ||
| 584 | |||
| 585 | uncomp_writer.WriteU(2, 2); // Frame marker. | ||
| 586 | uncomp_writer.WriteU(0, 2); // Profile. | ||
| 587 | uncomp_writer.WriteBit(false); // Show existing frame. | ||
| 588 | uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame? | ||
| 589 | uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame? | ||
| 590 | uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience | ||
| 591 | |||
| 592 | if (current_frame_info.is_key_frame) { | ||
| 593 | uncomp_writer.WriteU(frame_sync_code, 24); | ||
| 594 | uncomp_writer.WriteU(0, 3); // Color space. | ||
| 595 | uncomp_writer.WriteU(0, 1); // Color range. | ||
| 596 | uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16); | ||
| 597 | uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16); | ||
| 598 | uncomp_writer.WriteBit(false); // Render and frame size different. | ||
| 599 | |||
| 600 | // Reset context | ||
| 601 | prev_frame_probs = default_probs; | ||
| 602 | swap_ref_indices = false; | ||
| 603 | loop_filter_ref_deltas.fill(0); | ||
| 604 | loop_filter_mode_deltas.fill(0); | ||
| 605 | frame_ctxs.fill(default_probs); | ||
| 606 | |||
| 607 | // intra only, meaning the frame can be recreated with no other references | ||
| 608 | current_frame_info.intra_only = true; | ||
| 609 | } else { | ||
| 610 | if (!current_frame_info.show_frame) { | ||
| 611 | uncomp_writer.WriteBit(current_frame_info.intra_only); | ||
| 612 | } else { | ||
| 613 | current_frame_info.intra_only = false; | ||
| 614 | } | ||
| 615 | if (!current_frame_info.error_resilient_mode) { | ||
| 616 | uncomp_writer.WriteU(0, 2); // Reset frame context. | ||
| 617 | } | ||
| 618 | const auto& curr_offsets = current_frame_info.frame_offsets; | ||
| 619 | const auto& next_offsets = next_frame.info.frame_offsets; | ||
| 620 | const bool ref_frames_different = curr_offsets[1] != curr_offsets[2]; | ||
| 621 | const bool next_references_swap = | ||
| 622 | (next_offsets[1] == curr_offsets[2]) || (next_offsets[2] == curr_offsets[1]); | ||
| 623 | const bool needs_ref_swap = ref_frames_different && next_references_swap; | ||
| 624 | if (needs_ref_swap) { | ||
| 625 | swap_ref_indices = !swap_ref_indices; | ||
| 626 | } | ||
| 627 | union { | ||
| 628 | u32 raw; | ||
| 629 | BitField<0, 1, u32> refresh_last; | ||
| 630 | BitField<1, 2, u32> refresh_golden; | ||
| 631 | BitField<2, 1, u32> refresh_alt; | ||
| 632 | } refresh_frame_flags; | ||
| 633 | |||
| 634 | refresh_frame_flags.raw = 0; | ||
| 635 | for (u32 index = 0; index < 3; ++index) { | ||
| 636 | // Refresh indices that use the current frame as an index | ||
| 637 | if (curr_offsets[3] == next_offsets[index]) { | ||
| 638 | refresh_frame_flags.raw |= 1u << index; | ||
| 639 | } | ||
| 640 | } | ||
| 641 | if (swap_ref_indices) { | ||
| 642 | const u32 temp = refresh_frame_flags.refresh_golden; | ||
| 643 | refresh_frame_flags.refresh_golden.Assign(refresh_frame_flags.refresh_alt.Value()); | ||
| 644 | refresh_frame_flags.refresh_alt.Assign(temp); | ||
| 645 | } | ||
| 646 | if (current_frame_info.intra_only) { | ||
| 647 | uncomp_writer.WriteU(frame_sync_code, 24); | ||
| 648 | uncomp_writer.WriteU(refresh_frame_flags.raw, 8); | ||
| 649 | uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16); | ||
| 650 | uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16); | ||
| 651 | uncomp_writer.WriteBit(false); // Render and frame size different. | ||
| 652 | } else { | ||
| 653 | const bool swap_indices = needs_ref_swap ^ swap_ref_indices; | ||
| 654 | const auto ref_frame_index = swap_indices ? std::array{0, 2, 1} : std::array{0, 1, 2}; | ||
| 655 | uncomp_writer.WriteU(refresh_frame_flags.raw, 8); | ||
| 656 | for (size_t index = 1; index < 4; index++) { | ||
| 657 | uncomp_writer.WriteU(ref_frame_index[index - 1], 3); | ||
| 658 | uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1); | ||
| 659 | } | ||
| 660 | uncomp_writer.WriteBit(true); // Frame size with refs. | ||
| 661 | uncomp_writer.WriteBit(false); // Render and frame size different. | ||
| 662 | uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv); | ||
| 663 | uncomp_writer.WriteBit(current_frame_info.interp_filter == 4); | ||
| 664 | |||
| 665 | if (current_frame_info.interp_filter != 4) { | ||
| 666 | uncomp_writer.WriteU(current_frame_info.interp_filter, 2); | ||
| 667 | } | ||
| 668 | } | ||
| 669 | } | ||
| 670 | |||
| 671 | if (!current_frame_info.error_resilient_mode) { | ||
| 672 | uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from? | ||
| 673 | uncomp_writer.WriteBit(true); // Frame parallel decoding mode. | ||
| 674 | } | ||
| 675 | |||
| 676 | int frame_ctx_idx = 0; | ||
| 677 | if (!current_frame_info.show_frame) { | ||
| 678 | frame_ctx_idx = 1; | ||
| 679 | } | ||
| 680 | |||
| 681 | uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index. | ||
| 682 | prev_frame_probs = frame_ctxs[frame_ctx_idx]; // reference probabilities for compressed header | ||
| 683 | frame_ctxs[frame_ctx_idx] = current_frame_info.entropy; | ||
| 684 | |||
| 685 | uncomp_writer.WriteU(current_frame_info.first_level, 6); | ||
| 686 | uncomp_writer.WriteU(current_frame_info.sharpness_level, 3); | ||
| 687 | uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled); | ||
| 688 | |||
| 689 | if (current_frame_info.mode_ref_delta_enabled) { | ||
| 690 | // check if ref deltas are different, update accordingly | ||
| 691 | std::array<bool, 4> update_loop_filter_ref_deltas; | ||
| 692 | std::array<bool, 2> update_loop_filter_mode_deltas; | ||
| 693 | |||
| 694 | bool loop_filter_delta_update = false; | ||
| 695 | |||
| 696 | for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) { | ||
| 697 | const s8 old_deltas = loop_filter_ref_deltas[index]; | ||
| 698 | const s8 new_deltas = current_frame_info.ref_deltas[index]; | ||
| 699 | const bool differing_delta = old_deltas != new_deltas; | ||
| 700 | |||
| 701 | update_loop_filter_ref_deltas[index] = differing_delta; | ||
| 702 | loop_filter_delta_update |= differing_delta; | ||
| 703 | } | ||
| 704 | |||
| 705 | for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) { | ||
| 706 | const s8 old_deltas = loop_filter_mode_deltas[index]; | ||
| 707 | const s8 new_deltas = current_frame_info.mode_deltas[index]; | ||
| 708 | const bool differing_delta = old_deltas != new_deltas; | ||
| 709 | |||
| 710 | update_loop_filter_mode_deltas[index] = differing_delta; | ||
| 711 | loop_filter_delta_update |= differing_delta; | ||
| 712 | } | ||
| 713 | |||
| 714 | uncomp_writer.WriteBit(loop_filter_delta_update); | ||
| 715 | |||
| 716 | if (loop_filter_delta_update) { | ||
| 717 | for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) { | ||
| 718 | uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]); | ||
| 719 | |||
| 720 | if (update_loop_filter_ref_deltas[index]) { | ||
| 721 | uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6); | ||
| 722 | } | ||
| 723 | } | ||
| 724 | |||
| 725 | for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) { | ||
| 726 | uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]); | ||
| 727 | |||
| 728 | if (update_loop_filter_mode_deltas[index]) { | ||
| 729 | uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6); | ||
| 730 | } | ||
| 731 | } | ||
| 732 | // save new deltas | ||
| 733 | loop_filter_ref_deltas = current_frame_info.ref_deltas; | ||
| 734 | loop_filter_mode_deltas = current_frame_info.mode_deltas; | ||
| 735 | } | ||
| 736 | } | ||
| 737 | |||
| 738 | uncomp_writer.WriteU(current_frame_info.base_q_index, 8); | ||
| 739 | |||
| 740 | uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q); | ||
| 741 | uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q); | ||
| 742 | uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q); | ||
| 743 | |||
| 744 | ASSERT(!current_frame_info.segment_enabled); | ||
| 745 | uncomp_writer.WriteBit(false); // Segmentation enabled (TODO). | ||
| 746 | |||
| 747 | const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width); | ||
| 748 | const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width); | ||
| 749 | |||
| 750 | const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2; | ||
| 751 | const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1; | ||
| 752 | |||
| 753 | // If it's less than the maximum, we need to add an extra 0 on the bitstream | ||
| 754 | // to indicate that it should stop reading. | ||
| 755 | if (current_frame_info.log2_tile_cols < max_tile_cols_log2) { | ||
| 756 | uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1); | ||
| 757 | } else { | ||
| 758 | uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff); | ||
| 759 | } | ||
| 760 | |||
| 761 | const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0; | ||
| 762 | |||
| 763 | uncomp_writer.WriteBit(tile_rows_log2_is_nonzero); | ||
| 764 | |||
| 765 | if (tile_rows_log2_is_nonzero) { | ||
| 766 | uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1); | ||
| 767 | } | ||
| 768 | |||
| 769 | return uncomp_writer; | ||
| 770 | } | ||
| 771 | |||
| 772 | void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { | ||
| 773 | std::vector<u8> bitstream; | ||
| 774 | { | ||
| 775 | Vp9FrameContainer curr_frame = GetCurrentFrame(state); | ||
| 776 | current_frame_info = curr_frame.info; | ||
| 777 | bitstream = std::move(curr_frame.bit_stream); | ||
| 778 | } | ||
| 779 | // The uncompressed header routine sets PrevProb parameters needed for the compressed header | ||
| 780 | auto uncomp_writer = ComposeUncompressedHeader(); | ||
| 781 | std::vector<u8> compressed_header = ComposeCompressedHeader(); | ||
| 782 | |||
| 783 | uncomp_writer.WriteU(static_cast<s32>(compressed_header.size()), 16); | ||
| 784 | uncomp_writer.Flush(); | ||
| 785 | std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray(); | ||
| 786 | |||
| 787 | // Write headers and frame to buffer | ||
| 788 | frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size()); | ||
| 789 | std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame.begin()); | ||
| 790 | std::copy(compressed_header.begin(), compressed_header.end(), | ||
| 791 | frame.begin() + uncompressed_header.size()); | ||
| 792 | std::copy(bitstream.begin(), bitstream.end(), | ||
| 793 | frame.begin() + uncompressed_header.size() + compressed_header.size()); | ||
| 794 | } | ||
| 795 | |||
| 796 | VpxRangeEncoder::VpxRangeEncoder() { | ||
| 797 | Write(false); | ||
| 798 | } | ||
| 799 | |||
| 800 | VpxRangeEncoder::~VpxRangeEncoder() = default; | ||
| 801 | |||
| 802 | void VpxRangeEncoder::Write(s32 value, s32 value_size) { | ||
| 803 | for (s32 bit = value_size - 1; bit >= 0; bit--) { | ||
| 804 | Write(((value >> bit) & 1) != 0); | ||
| 805 | } | ||
| 806 | } | ||
| 807 | |||
| 808 | void VpxRangeEncoder::Write(bool bit) { | ||
| 809 | Write(bit, half_probability); | ||
| 810 | } | ||
| 811 | |||
| 812 | void VpxRangeEncoder::Write(bool bit, s32 probability) { | ||
| 813 | u32 local_range = range; | ||
| 814 | const u32 split = 1 + (((local_range - 1) * static_cast<u32>(probability)) >> 8); | ||
| 815 | local_range = split; | ||
| 816 | |||
| 817 | if (bit) { | ||
| 818 | low_value += split; | ||
| 819 | local_range = range - split; | ||
| 820 | } | ||
| 821 | |||
| 822 | s32 shift = static_cast<s32>(norm_lut[local_range]); | ||
| 823 | local_range <<= shift; | ||
| 824 | count += shift; | ||
| 825 | |||
| 826 | if (count >= 0) { | ||
| 827 | const s32 offset = shift - count; | ||
| 828 | |||
| 829 | if (((low_value << (offset - 1)) >> 31) != 0) { | ||
| 830 | const s32 current_pos = static_cast<s32>(base_stream.GetPosition()); | ||
| 831 | base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos); | ||
| 832 | while (PeekByte() == 0xff) { | ||
| 833 | base_stream.WriteByte(0); | ||
| 834 | |||
| 835 | base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos); | ||
| 836 | } | ||
| 837 | base_stream.WriteByte(static_cast<u8>((PeekByte() + 1))); | ||
| 838 | base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin); | ||
| 839 | } | ||
| 840 | base_stream.WriteByte(static_cast<u8>((low_value >> (24 - offset)))); | ||
| 841 | |||
| 842 | low_value <<= offset; | ||
| 843 | shift = count; | ||
| 844 | low_value &= 0xffffff; | ||
| 845 | count -= 8; | ||
| 846 | } | ||
| 847 | |||
| 848 | low_value <<= shift; | ||
| 849 | range = local_range; | ||
| 850 | } | ||
| 851 | |||
| 852 | void VpxRangeEncoder::End() { | ||
| 853 | for (std::size_t index = 0; index < 32; ++index) { | ||
| 854 | Write(false); | ||
| 855 | } | ||
| 856 | } | ||
| 857 | |||
| 858 | u8 VpxRangeEncoder::PeekByte() { | ||
| 859 | const u8 value = base_stream.ReadByte(); | ||
| 860 | base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos); | ||
| 861 | |||
| 862 | return value; | ||
| 863 | } | ||
| 864 | |||
| 865 | VpxBitStreamWriter::VpxBitStreamWriter() = default; | ||
| 866 | |||
| 867 | VpxBitStreamWriter::~VpxBitStreamWriter() = default; | ||
| 868 | |||
| 869 | void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) { | ||
| 870 | WriteBits(value, value_size); | ||
| 871 | } | ||
| 872 | |||
| 873 | void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) { | ||
| 874 | const bool sign = value < 0; | ||
| 875 | if (sign) { | ||
| 876 | value = -value; | ||
| 877 | } | ||
| 878 | |||
| 879 | WriteBits(static_cast<u32>(value << 1) | (sign ? 1 : 0), value_size + 1); | ||
| 880 | } | ||
| 881 | |||
| 882 | void VpxBitStreamWriter::WriteDeltaQ(u32 value) { | ||
| 883 | const bool delta_coded = value != 0; | ||
| 884 | WriteBit(delta_coded); | ||
| 885 | |||
| 886 | if (delta_coded) { | ||
| 887 | WriteBits(value, 4); | ||
| 888 | } | ||
| 889 | } | ||
| 890 | |||
| 891 | void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) { | ||
| 892 | s32 value_pos = 0; | ||
| 893 | s32 remaining = bit_count; | ||
| 894 | |||
| 895 | while (remaining > 0) { | ||
| 896 | s32 copy_size = remaining; | ||
| 897 | |||
| 898 | const s32 free = GetFreeBufferBits(); | ||
| 899 | |||
| 900 | if (copy_size > free) { | ||
| 901 | copy_size = free; | ||
| 902 | } | ||
| 903 | |||
| 904 | const s32 mask = (1 << copy_size) - 1; | ||
| 905 | |||
| 906 | const s32 src_shift = (bit_count - value_pos) - copy_size; | ||
| 907 | const s32 dst_shift = (buffer_size - buffer_pos) - copy_size; | ||
| 908 | |||
| 909 | buffer |= ((value >> src_shift) & mask) << dst_shift; | ||
| 910 | |||
| 911 | value_pos += copy_size; | ||
| 912 | buffer_pos += copy_size; | ||
| 913 | remaining -= copy_size; | ||
| 914 | } | ||
| 915 | } | ||
| 916 | |||
| 917 | void VpxBitStreamWriter::WriteBit(bool state) { | ||
| 918 | WriteBits(state ? 1 : 0, 1); | ||
| 919 | } | ||
| 920 | |||
| 921 | s32 VpxBitStreamWriter::GetFreeBufferBits() { | ||
| 922 | if (buffer_pos == buffer_size) { | ||
| 923 | Flush(); | ||
| 924 | } | ||
| 925 | |||
| 926 | return buffer_size - buffer_pos; | ||
| 927 | } | ||
| 928 | |||
| 929 | void VpxBitStreamWriter::Flush() { | ||
| 930 | if (buffer_pos == 0) { | ||
| 931 | return; | ||
| 932 | } | ||
| 933 | byte_array.push_back(static_cast<u8>(buffer)); | ||
| 934 | buffer = 0; | ||
| 935 | buffer_pos = 0; | ||
| 936 | } | ||
| 937 | |||
| 938 | std::vector<u8>& VpxBitStreamWriter::GetByteArray() { | ||
| 939 | return byte_array; | ||
| 940 | } | ||
| 941 | |||
| 942 | const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const { | ||
| 943 | return byte_array; | ||
| 944 | } | ||
| 945 | |||
| 946 | } // namespace Tegra::Decoder | ||
diff --git a/src/video_core/host1x/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h new file mode 100644 index 000000000..a425c0fa4 --- /dev/null +++ b/src/video_core/host1x/codecs/vp9.h | |||
| @@ -0,0 +1,194 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <array> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/stream.h" | ||
| 11 | #include "video_core/host1x/codecs/vp9_types.h" | ||
| 12 | #include "video_core/host1x/nvdec_common.h" | ||
| 13 | |||
| 14 | namespace Tegra { | ||
| 15 | class GPU; | ||
| 16 | namespace Decoder { | ||
| 17 | |||
| 18 | /// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the | ||
| 19 | /// VP9 header bitstreams. | ||
| 20 | |||
| 21 | class VpxRangeEncoder { | ||
| 22 | public: | ||
| 23 | VpxRangeEncoder(); | ||
| 24 | ~VpxRangeEncoder(); | ||
| 25 | |||
| 26 | VpxRangeEncoder(const VpxRangeEncoder&) = delete; | ||
| 27 | VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete; | ||
| 28 | |||
| 29 | VpxRangeEncoder(VpxRangeEncoder&&) = default; | ||
| 30 | VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default; | ||
| 31 | |||
| 32 | /// Writes the rightmost value_size bits from value into the stream | ||
| 33 | void Write(s32 value, s32 value_size); | ||
| 34 | |||
| 35 | /// Writes a single bit with half probability | ||
| 36 | void Write(bool bit); | ||
| 37 | |||
| 38 | /// Writes a bit to the base_stream encoded with probability | ||
| 39 | void Write(bool bit, s32 probability); | ||
| 40 | |||
| 41 | /// Signal the end of the bitstream | ||
| 42 | void End(); | ||
| 43 | |||
| 44 | [[nodiscard]] std::vector<u8>& GetBuffer() { | ||
| 45 | return base_stream.GetBuffer(); | ||
| 46 | } | ||
| 47 | |||
| 48 | [[nodiscard]] const std::vector<u8>& GetBuffer() const { | ||
| 49 | return base_stream.GetBuffer(); | ||
| 50 | } | ||
| 51 | |||
| 52 | private: | ||
| 53 | u8 PeekByte(); | ||
| 54 | Common::Stream base_stream{}; | ||
| 55 | u32 low_value{}; | ||
| 56 | u32 range{0xff}; | ||
| 57 | s32 count{-24}; | ||
| 58 | s32 half_probability{128}; | ||
| 59 | }; | ||
| 60 | |||
| 61 | class VpxBitStreamWriter { | ||
| 62 | public: | ||
| 63 | VpxBitStreamWriter(); | ||
| 64 | ~VpxBitStreamWriter(); | ||
| 65 | |||
| 66 | VpxBitStreamWriter(const VpxBitStreamWriter&) = delete; | ||
| 67 | VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete; | ||
| 68 | |||
| 69 | VpxBitStreamWriter(VpxBitStreamWriter&&) = default; | ||
| 70 | VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default; | ||
| 71 | |||
| 72 | /// Write an unsigned integer value | ||
| 73 | void WriteU(u32 value, u32 value_size); | ||
| 74 | |||
| 75 | /// Write a signed integer value | ||
| 76 | void WriteS(s32 value, u32 value_size); | ||
| 77 | |||
| 78 | /// Based on 6.2.10 of VP9 Spec, writes a delta coded value | ||
| 79 | void WriteDeltaQ(u32 value); | ||
| 80 | |||
| 81 | /// Write a single bit. | ||
| 82 | void WriteBit(bool state); | ||
| 83 | |||
| 84 | /// Pushes current buffer into buffer_array, resets buffer | ||
| 85 | void Flush(); | ||
| 86 | |||
| 87 | /// Returns byte_array | ||
| 88 | [[nodiscard]] std::vector<u8>& GetByteArray(); | ||
| 89 | |||
| 90 | /// Returns const byte_array | ||
| 91 | [[nodiscard]] const std::vector<u8>& GetByteArray() const; | ||
| 92 | |||
| 93 | private: | ||
| 94 | /// Write bit_count bits from value into buffer | ||
| 95 | void WriteBits(u32 value, u32 bit_count); | ||
| 96 | |||
| 97 | /// Gets next available position in buffer, invokes Flush() if buffer is full | ||
| 98 | s32 GetFreeBufferBits(); | ||
| 99 | |||
| 100 | s32 buffer_size{8}; | ||
| 101 | |||
| 102 | s32 buffer{}; | ||
| 103 | s32 buffer_pos{}; | ||
| 104 | std::vector<u8> byte_array; | ||
| 105 | }; | ||
| 106 | |||
| 107 | class VP9 { | ||
| 108 | public: | ||
| 109 | explicit VP9(GPU& gpu_); | ||
| 110 | ~VP9(); | ||
| 111 | |||
| 112 | VP9(const VP9&) = delete; | ||
| 113 | VP9& operator=(const VP9&) = delete; | ||
| 114 | |||
| 115 | VP9(VP9&&) = default; | ||
| 116 | VP9& operator=(VP9&&) = delete; | ||
| 117 | |||
| 118 | /// Composes the VP9 frame from the GPU state information. | ||
| 119 | /// Based on the official VP9 spec documentation | ||
| 120 | void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state); | ||
| 121 | |||
| 122 | /// Returns true if the most recent frame was a hidden frame. | ||
| 123 | [[nodiscard]] bool WasFrameHidden() const { | ||
| 124 | return !current_frame_info.show_frame; | ||
| 125 | } | ||
| 126 | |||
| 127 | /// Returns a const reference to the composed frame data. | ||
| 128 | [[nodiscard]] const std::vector<u8>& GetFrameBytes() const { | ||
| 129 | return frame; | ||
| 130 | } | ||
| 131 | |||
| 132 | private: | ||
| 133 | /// Generates compressed header probability updates in the bitstream writer | ||
| 134 | template <typename T, std::size_t N> | ||
| 135 | void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, | ||
| 136 | const std::array<T, N>& old_prob); | ||
| 137 | |||
| 138 | /// Generates compressed header probability updates in the bitstream writer | ||
| 139 | /// If probs are not equal, WriteProbabilityDelta is invoked | ||
| 140 | void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); | ||
| 141 | |||
| 142 | /// Generates compressed header probability deltas in the bitstream writer | ||
| 143 | void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); | ||
| 144 | |||
| 145 | /// Inverse of 6.3.4 Decode term subexp | ||
| 146 | void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value); | ||
| 147 | |||
| 148 | /// Writes if the value is less than the test value | ||
| 149 | bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test); | ||
| 150 | |||
| 151 | /// Writes probability updates for the Coef probabilities | ||
| 152 | void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode, | ||
| 153 | const std::array<u8, 1728>& new_prob, | ||
| 154 | const std::array<u8, 1728>& old_prob); | ||
| 155 | |||
| 156 | /// Write probabilities for 4-byte aligned structures | ||
| 157 | template <typename T, std::size_t N> | ||
| 158 | void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob, | ||
| 159 | const std::array<T, N>& old_prob); | ||
| 160 | |||
| 161 | /// Write motion vector probability updates. 6.3.17 in the spec | ||
| 162 | void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); | ||
| 163 | |||
| 164 | /// Returns VP9 information from NVDEC provided offset and size | ||
| 165 | [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo( | ||
| 166 | const Host1x::NvdecCommon::NvdecRegisters& state); | ||
| 167 | |||
| 168 | /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct | ||
| 169 | void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); | ||
| 170 | |||
| 171 | /// Returns frame to be decoded after buffering | ||
| 172 | [[nodiscard]] Vp9FrameContainer GetCurrentFrame( | ||
| 173 | const Host1x::NvdecCommon::NvdecRegisters& state); | ||
| 174 | |||
| 175 | /// Use NVDEC providied information to compose the headers for the current frame | ||
| 176 | [[nodiscard]] std::vector<u8> ComposeCompressedHeader(); | ||
| 177 | [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader(); | ||
| 178 | |||
| 179 | GPU& gpu; | ||
| 180 | std::vector<u8> frame; | ||
| 181 | |||
| 182 | std::array<s8, 4> loop_filter_ref_deltas{}; | ||
| 183 | std::array<s8, 2> loop_filter_mode_deltas{}; | ||
| 184 | |||
| 185 | Vp9FrameContainer next_frame{}; | ||
| 186 | std::array<Vp9EntropyProbs, 4> frame_ctxs{}; | ||
| 187 | bool swap_ref_indices{}; | ||
| 188 | |||
| 189 | Vp9PictureInfo current_frame_info{}; | ||
| 190 | Vp9EntropyProbs prev_frame_probs{}; | ||
| 191 | }; | ||
| 192 | |||
| 193 | } // namespace Decoder | ||
| 194 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h new file mode 100644 index 000000000..bb3d8df6e --- /dev/null +++ b/src/video_core/host1x/codecs/vp9_types.h | |||
| @@ -0,0 +1,306 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <array> | ||
| 7 | #include <vector> | ||
| 8 | #include "common/common_funcs.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace Tegra { | ||
| 12 | class GPU; | ||
| 13 | |||
| 14 | namespace Decoder { | ||
| 15 | struct Vp9FrameDimensions { | ||
| 16 | s16 width; | ||
| 17 | s16 height; | ||
| 18 | s16 luma_pitch; | ||
| 19 | s16 chroma_pitch; | ||
| 20 | }; | ||
| 21 | static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); | ||
| 22 | |||
| 23 | enum class FrameFlags : u32 { | ||
| 24 | IsKeyFrame = 1 << 0, | ||
| 25 | LastFrameIsKeyFrame = 1 << 1, | ||
| 26 | FrameSizeChanged = 1 << 2, | ||
| 27 | ErrorResilientMode = 1 << 3, | ||
| 28 | LastShowFrame = 1 << 4, | ||
| 29 | IntraOnly = 1 << 5, | ||
| 30 | }; | ||
| 31 | DECLARE_ENUM_FLAG_OPERATORS(FrameFlags) | ||
| 32 | |||
| 33 | enum class TxSize { | ||
| 34 | Tx4x4 = 0, // 4x4 transform | ||
| 35 | Tx8x8 = 1, // 8x8 transform | ||
| 36 | Tx16x16 = 2, // 16x16 transform | ||
| 37 | Tx32x32 = 3, // 32x32 transform | ||
| 38 | TxSizes = 4 | ||
| 39 | }; | ||
| 40 | |||
| 41 | enum class TxMode { | ||
| 42 | Only4X4 = 0, // Only 4x4 transform used | ||
| 43 | Allow8X8 = 1, // Allow block transform size up to 8x8 | ||
| 44 | Allow16X16 = 2, // Allow block transform size up to 16x16 | ||
| 45 | Allow32X32 = 3, // Allow block transform size up to 32x32 | ||
| 46 | TxModeSelect = 4, // Transform specified for each block | ||
| 47 | TxModes = 5 | ||
| 48 | }; | ||
| 49 | |||
| 50 | struct Segmentation { | ||
| 51 | u8 enabled; | ||
| 52 | u8 update_map; | ||
| 53 | u8 temporal_update; | ||
| 54 | u8 abs_delta; | ||
| 55 | std::array<u32, 8> feature_mask; | ||
| 56 | std::array<std::array<s16, 4>, 8> feature_data; | ||
| 57 | }; | ||
| 58 | static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); | ||
| 59 | |||
| 60 | struct LoopFilter { | ||
| 61 | u8 mode_ref_delta_enabled; | ||
| 62 | std::array<s8, 4> ref_deltas; | ||
| 63 | std::array<s8, 2> mode_deltas; | ||
| 64 | }; | ||
| 65 | static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); | ||
| 66 | |||
| 67 | struct Vp9EntropyProbs { | ||
| 68 | std::array<u8, 36> y_mode_prob; ///< 0x0000 | ||
| 69 | std::array<u8, 64> partition_prob; ///< 0x0024 | ||
| 70 | std::array<u8, 1728> coef_probs; ///< 0x0064 | ||
| 71 | std::array<u8, 8> switchable_interp_prob; ///< 0x0724 | ||
| 72 | std::array<u8, 28> inter_mode_prob; ///< 0x072C | ||
| 73 | std::array<u8, 4> intra_inter_prob; ///< 0x0748 | ||
| 74 | std::array<u8, 5> comp_inter_prob; ///< 0x074C | ||
| 75 | std::array<u8, 10> single_ref_prob; ///< 0x0751 | ||
| 76 | std::array<u8, 5> comp_ref_prob; ///< 0x075B | ||
| 77 | std::array<u8, 6> tx_32x32_prob; ///< 0x0760 | ||
| 78 | std::array<u8, 4> tx_16x16_prob; ///< 0x0766 | ||
| 79 | std::array<u8, 2> tx_8x8_prob; ///< 0x076A | ||
| 80 | std::array<u8, 3> skip_probs; ///< 0x076C | ||
| 81 | std::array<u8, 3> joints; ///< 0x076F | ||
| 82 | std::array<u8, 2> sign; ///< 0x0772 | ||
| 83 | std::array<u8, 20> classes; ///< 0x0774 | ||
| 84 | std::array<u8, 2> class_0; ///< 0x0788 | ||
| 85 | std::array<u8, 20> prob_bits; ///< 0x078A | ||
| 86 | std::array<u8, 12> class_0_fr; ///< 0x079E | ||
| 87 | std::array<u8, 6> fr; ///< 0x07AA | ||
| 88 | std::array<u8, 2> class_0_hp; ///< 0x07B0 | ||
| 89 | std::array<u8, 2> high_precision; ///< 0x07B2 | ||
| 90 | }; | ||
| 91 | static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); | ||
| 92 | |||
| 93 | struct Vp9PictureInfo { | ||
| 94 | u32 bitstream_size; | ||
| 95 | std::array<u64, 4> frame_offsets; | ||
| 96 | std::array<s8, 4> ref_frame_sign_bias; | ||
| 97 | s32 base_q_index; | ||
| 98 | s32 y_dc_delta_q; | ||
| 99 | s32 uv_dc_delta_q; | ||
| 100 | s32 uv_ac_delta_q; | ||
| 101 | s32 transform_mode; | ||
| 102 | s32 interp_filter; | ||
| 103 | s32 reference_mode; | ||
| 104 | s32 log2_tile_cols; | ||
| 105 | s32 log2_tile_rows; | ||
| 106 | std::array<s8, 4> ref_deltas; | ||
| 107 | std::array<s8, 2> mode_deltas; | ||
| 108 | Vp9EntropyProbs entropy; | ||
| 109 | Vp9FrameDimensions frame_size; | ||
| 110 | u8 first_level; | ||
| 111 | u8 sharpness_level; | ||
| 112 | bool is_key_frame; | ||
| 113 | bool intra_only; | ||
| 114 | bool last_frame_was_key; | ||
| 115 | bool error_resilient_mode; | ||
| 116 | bool last_frame_shown; | ||
| 117 | bool show_frame; | ||
| 118 | bool lossless; | ||
| 119 | bool allow_high_precision_mv; | ||
| 120 | bool segment_enabled; | ||
| 121 | bool mode_ref_delta_enabled; | ||
| 122 | }; | ||
| 123 | |||
| 124 | struct Vp9FrameContainer { | ||
| 125 | Vp9PictureInfo info{}; | ||
| 126 | std::vector<u8> bit_stream; | ||
| 127 | }; | ||
| 128 | |||
| 129 | struct PictureInfo { | ||
| 130 | INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00 | ||
| 131 | u32 bitstream_size; ///< 0x30 | ||
| 132 | INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34 | ||
| 133 | Vp9FrameDimensions last_frame_size; ///< 0x48 | ||
| 134 | Vp9FrameDimensions golden_frame_size; ///< 0x50 | ||
| 135 | Vp9FrameDimensions alt_frame_size; ///< 0x58 | ||
| 136 | Vp9FrameDimensions current_frame_size; ///< 0x60 | ||
| 137 | FrameFlags vp9_flags; ///< 0x68 | ||
| 138 | std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C | ||
| 139 | u8 first_level; ///< 0x70 | ||
| 140 | u8 sharpness_level; ///< 0x71 | ||
| 141 | u8 base_q_index; ///< 0x72 | ||
| 142 | u8 y_dc_delta_q; ///< 0x73 | ||
| 143 | u8 uv_ac_delta_q; ///< 0x74 | ||
| 144 | u8 uv_dc_delta_q; ///< 0x75 | ||
| 145 | u8 lossless; ///< 0x76 | ||
| 146 | u8 tx_mode; ///< 0x77 | ||
| 147 | u8 allow_high_precision_mv; ///< 0x78 | ||
| 148 | u8 interp_filter; ///< 0x79 | ||
| 149 | u8 reference_mode; ///< 0x7A | ||
| 150 | INSERT_PADDING_BYTES_NOINIT(3); ///< 0x7B | ||
| 151 | u8 log2_tile_cols; ///< 0x7E | ||
| 152 | u8 log2_tile_rows; ///< 0x7F | ||
| 153 | Segmentation segmentation; ///< 0x80 | ||
| 154 | LoopFilter loop_filter; ///< 0xE4 | ||
| 155 | INSERT_PADDING_BYTES_NOINIT(21); ///< 0xEB | ||
| 156 | |||
| 157 | [[nodiscard]] Vp9PictureInfo Convert() const { | ||
| 158 | return { | ||
| 159 | .bitstream_size = bitstream_size, | ||
| 160 | .frame_offsets{}, | ||
| 161 | .ref_frame_sign_bias = ref_frame_sign_bias, | ||
| 162 | .base_q_index = base_q_index, | ||
| 163 | .y_dc_delta_q = y_dc_delta_q, | ||
| 164 | .uv_dc_delta_q = uv_dc_delta_q, | ||
| 165 | .uv_ac_delta_q = uv_ac_delta_q, | ||
| 166 | .transform_mode = tx_mode, | ||
| 167 | .interp_filter = interp_filter, | ||
| 168 | .reference_mode = reference_mode, | ||
| 169 | .log2_tile_cols = log2_tile_cols, | ||
| 170 | .log2_tile_rows = log2_tile_rows, | ||
| 171 | .ref_deltas = loop_filter.ref_deltas, | ||
| 172 | .mode_deltas = loop_filter.mode_deltas, | ||
| 173 | .entropy{}, | ||
| 174 | .frame_size = current_frame_size, | ||
| 175 | .first_level = first_level, | ||
| 176 | .sharpness_level = sharpness_level, | ||
| 177 | .is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame), | ||
| 178 | .intra_only = True(vp9_flags & FrameFlags::IntraOnly), | ||
| 179 | .last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame), | ||
| 180 | .error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode), | ||
| 181 | .last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame), | ||
| 182 | .show_frame = true, | ||
| 183 | .lossless = lossless != 0, | ||
| 184 | .allow_high_precision_mv = allow_high_precision_mv != 0, | ||
| 185 | .segment_enabled = segmentation.enabled != 0, | ||
| 186 | .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, | ||
| 187 | }; | ||
| 188 | } | ||
| 189 | }; | ||
| 190 | static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); | ||
| 191 | |||
| 192 | struct EntropyProbs { | ||
| 193 | INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000 | ||
| 194 | std::array<u8, 28> inter_mode_prob; ///< 0x0400 | ||
| 195 | std::array<u8, 4> intra_inter_prob; ///< 0x041C | ||
| 196 | INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420 | ||
| 197 | std::array<u8, 2> tx_8x8_prob; ///< 0x0470 | ||
| 198 | std::array<u8, 4> tx_16x16_prob; ///< 0x0472 | ||
| 199 | std::array<u8, 6> tx_32x32_prob; ///< 0x0476 | ||
| 200 | std::array<u8, 4> y_mode_prob_e8; ///< 0x047C | ||
| 201 | std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480 | ||
| 202 | INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0 | ||
| 203 | std::array<u8, 64> partition_prob; ///< 0x04E0 | ||
| 204 | INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520 | ||
| 205 | std::array<u8, 8> switchable_interp_prob; ///< 0x052A | ||
| 206 | std::array<u8, 5> comp_inter_prob; ///< 0x0532 | ||
| 207 | std::array<u8, 3> skip_probs; ///< 0x0537 | ||
| 208 | INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A | ||
| 209 | std::array<u8, 3> joints; ///< 0x053B | ||
| 210 | std::array<u8, 2> sign; ///< 0x053E | ||
| 211 | std::array<u8, 2> class_0; ///< 0x0540 | ||
| 212 | std::array<u8, 6> fr; ///< 0x0542 | ||
| 213 | std::array<u8, 2> class_0_hp; ///< 0x0548 | ||
| 214 | std::array<u8, 2> high_precision; ///< 0x054A | ||
| 215 | std::array<u8, 20> classes; ///< 0x054C | ||
| 216 | std::array<u8, 12> class_0_fr; ///< 0x0560 | ||
| 217 | std::array<u8, 20> pred_bits; ///< 0x056C | ||
| 218 | std::array<u8, 10> single_ref_prob; ///< 0x0580 | ||
| 219 | std::array<u8, 5> comp_ref_prob; ///< 0x058A | ||
| 220 | INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F | ||
| 221 | std::array<u8, 2304> coef_probs; ///< 0x05A0 | ||
| 222 | |||
| 223 | void Convert(Vp9EntropyProbs& fc) { | ||
| 224 | fc.inter_mode_prob = inter_mode_prob; | ||
| 225 | fc.intra_inter_prob = intra_inter_prob; | ||
| 226 | fc.tx_8x8_prob = tx_8x8_prob; | ||
| 227 | fc.tx_16x16_prob = tx_16x16_prob; | ||
| 228 | fc.tx_32x32_prob = tx_32x32_prob; | ||
| 229 | |||
| 230 | for (std::size_t i = 0; i < 4; i++) { | ||
| 231 | for (std::size_t j = 0; j < 9; j++) { | ||
| 232 | fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i]; | ||
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | fc.partition_prob = partition_prob; | ||
| 237 | fc.switchable_interp_prob = switchable_interp_prob; | ||
| 238 | fc.comp_inter_prob = comp_inter_prob; | ||
| 239 | fc.skip_probs = skip_probs; | ||
| 240 | fc.joints = joints; | ||
| 241 | fc.sign = sign; | ||
| 242 | fc.class_0 = class_0; | ||
| 243 | fc.fr = fr; | ||
| 244 | fc.class_0_hp = class_0_hp; | ||
| 245 | fc.high_precision = high_precision; | ||
| 246 | fc.classes = classes; | ||
| 247 | fc.class_0_fr = class_0_fr; | ||
| 248 | fc.prob_bits = pred_bits; | ||
| 249 | fc.single_ref_prob = single_ref_prob; | ||
| 250 | fc.comp_ref_prob = comp_ref_prob; | ||
| 251 | |||
| 252 | // Skip the 4th element as it goes unused | ||
| 253 | for (std::size_t i = 0; i < coef_probs.size(); i += 4) { | ||
| 254 | const std::size_t j = i - i / 4; | ||
| 255 | fc.coef_probs[j] = coef_probs[i]; | ||
| 256 | fc.coef_probs[j + 1] = coef_probs[i + 1]; | ||
| 257 | fc.coef_probs[j + 2] = coef_probs[i + 2]; | ||
| 258 | } | ||
| 259 | } | ||
| 260 | }; | ||
| 261 | static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size"); | ||
| 262 | |||
| 263 | enum class Ref { Last, Golden, AltRef }; | ||
| 264 | |||
| 265 | struct RefPoolElement { | ||
| 266 | s64 frame{}; | ||
| 267 | Ref ref{}; | ||
| 268 | bool refresh{}; | ||
| 269 | }; | ||
| 270 | |||
| 271 | #define ASSERT_POSITION(field_name, position) \ | ||
| 272 | static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \ | ||
| 273 | "Field " #field_name " has invalid position") | ||
| 274 | |||
| 275 | ASSERT_POSITION(partition_prob, 0x0024); | ||
| 276 | ASSERT_POSITION(switchable_interp_prob, 0x0724); | ||
| 277 | ASSERT_POSITION(sign, 0x0772); | ||
| 278 | ASSERT_POSITION(class_0_fr, 0x079E); | ||
| 279 | ASSERT_POSITION(high_precision, 0x07B2); | ||
| 280 | #undef ASSERT_POSITION | ||
| 281 | |||
| 282 | #define ASSERT_POSITION(field_name, position) \ | ||
| 283 | static_assert(offsetof(PictureInfo, field_name) == position, \ | ||
| 284 | "Field " #field_name " has invalid position") | ||
| 285 | |||
| 286 | ASSERT_POSITION(bitstream_size, 0x30); | ||
| 287 | ASSERT_POSITION(last_frame_size, 0x48); | ||
| 288 | ASSERT_POSITION(first_level, 0x70); | ||
| 289 | ASSERT_POSITION(segmentation, 0x80); | ||
| 290 | ASSERT_POSITION(loop_filter, 0xE4); | ||
| 291 | #undef ASSERT_POSITION | ||
| 292 | |||
| 293 | #define ASSERT_POSITION(field_name, position) \ | ||
| 294 | static_assert(offsetof(EntropyProbs, field_name) == position, \ | ||
| 295 | "Field " #field_name " has invalid position") | ||
| 296 | |||
| 297 | ASSERT_POSITION(inter_mode_prob, 0x400); | ||
| 298 | ASSERT_POSITION(tx_8x8_prob, 0x470); | ||
| 299 | ASSERT_POSITION(partition_prob, 0x4E0); | ||
| 300 | ASSERT_POSITION(class_0, 0x540); | ||
| 301 | ASSERT_POSITION(class_0_fr, 0x560); | ||
| 302 | ASSERT_POSITION(coef_probs, 0x5A0); | ||
| 303 | #undef ASSERT_POSITION | ||
| 304 | |||
| 305 | }; // namespace Decoder | ||
| 306 | }; // namespace Tegra | ||
diff --git a/src/video_core/host1x/control.cpp b/src/video_core/host1x/control.cpp new file mode 100644 index 000000000..b72b01aa3 --- /dev/null +++ b/src/video_core/host1x/control.cpp | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // Copyright 2022 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "video_core/gpu.h" | ||
| 7 | #include "video_core/host1x/control.h" | ||
| 8 | #include "video_core/host1x/host1x.h" | ||
| 9 | |||
| 10 | namespace Tegra::Host1x { | ||
| 11 | |||
| 12 | Control::Control(GPU& gpu_) : gpu(gpu_) {} | ||
| 13 | |||
| 14 | Control::~Control() = default; | ||
| 15 | |||
| 16 | void Control::ProcessMethod(Method method, u32 argument) { | ||
| 17 | switch (method) { | ||
| 18 | case Method::LoadSyncptPayload32: | ||
| 19 | syncpoint_value = argument; | ||
| 20 | break; | ||
| 21 | case Method::WaitSyncpt: | ||
| 22 | case Method::WaitSyncpt32: | ||
| 23 | Execute(argument); | ||
| 24 | break; | ||
| 25 | default: | ||
| 26 | UNIMPLEMENTED_MSG("Control method 0x{:X}", static_cast<u32>(method)); | ||
| 27 | break; | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void Control::Execute(u32 data) { | ||
| 32 | gpu.Host1x().GetSyncpointManager().WaitHost(data, syncpoint_value); | ||
| 33 | } | ||
| 34 | |||
| 35 | } // namespace Tegra::Host1x | ||
diff --git a/src/video_core/host1x/control.h b/src/video_core/host1x/control.h new file mode 100644 index 000000000..04dac7d51 --- /dev/null +++ b/src/video_core/host1x/control.h | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors | ||
| 2 | // (https://github.com/skyline-emu/) | ||
| 3 | // SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 | ||
| 4 | // or any later version Refer to the license.txt file included. | ||
| 5 | |||
| 6 | #pragma once | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | namespace Tegra { | ||
| 11 | class GPU; | ||
| 12 | |||
| 13 | namespace Host1x { | ||
| 14 | |||
| 15 | class Nvdec; | ||
| 16 | |||
| 17 | class Control { | ||
| 18 | public: | ||
| 19 | enum class Method : u32 { | ||
| 20 | WaitSyncpt = 0x8, | ||
| 21 | LoadSyncptPayload32 = 0x4e, | ||
| 22 | WaitSyncpt32 = 0x50, | ||
| 23 | }; | ||
| 24 | |||
| 25 | explicit Control(GPU& gpu); | ||
| 26 | ~Control(); | ||
| 27 | |||
| 28 | /// Writes the method into the state, Invoke Execute() if encountered | ||
| 29 | void ProcessMethod(Method method, u32 argument); | ||
| 30 | |||
| 31 | private: | ||
| 32 | /// For Host1x, execute is waiting on a syncpoint previously written into the state | ||
| 33 | void Execute(u32 data); | ||
| 34 | |||
| 35 | u32 syncpoint_value{}; | ||
| 36 | GPU& gpu; | ||
| 37 | }; | ||
| 38 | |||
| 39 | } // namespace Host1x | ||
| 40 | |||
| 41 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h new file mode 100644 index 000000000..2971be286 --- /dev/null +++ b/src/video_core/host1x/host1x.h | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | // Copyright 2022 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 10 | |||
| 11 | namespace Tegra { | ||
| 12 | |||
| 13 | namespace Host1x { | ||
| 14 | |||
| 15 | class Host1x { | ||
| 16 | public: | ||
| 17 | Host1x() : syncpoint_manager{} {} | ||
| 18 | |||
| 19 | SyncpointManager& GetSyncpointManager() { | ||
| 20 | return syncpoint_manager; | ||
| 21 | } | ||
| 22 | |||
| 23 | const SyncpointManager& GetSyncpointManager() const { | ||
| 24 | return syncpoint_manager; | ||
| 25 | } | ||
| 26 | |||
| 27 | private: | ||
| 28 | SyncpointManager syncpoint_manager; | ||
| 29 | }; | ||
| 30 | |||
| 31 | } // namespace Host1x | ||
| 32 | |||
| 33 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/nvdec.cpp b/src/video_core/host1x/nvdec.cpp new file mode 100644 index 000000000..5f6decd0d --- /dev/null +++ b/src/video_core/host1x/nvdec.cpp | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "common/assert.h" | ||
| 5 | #include "video_core/gpu.h" | ||
| 6 | #include "video_core/host1x/nvdec.h" | ||
| 7 | |||
| 8 | namespace Tegra::Host1x { | ||
| 9 | |||
| 10 | #define NVDEC_REG_INDEX(field_name) \ | ||
| 11 | (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) | ||
| 12 | |||
| 13 | Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {} | ||
| 14 | |||
| 15 | Nvdec::~Nvdec() = default; | ||
| 16 | |||
| 17 | void Nvdec::ProcessMethod(u32 method, u32 argument) { | ||
| 18 | state.reg_array[method] = static_cast<u64>(argument) << 8; | ||
| 19 | |||
| 20 | switch (method) { | ||
| 21 | case NVDEC_REG_INDEX(set_codec_id): | ||
| 22 | codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); | ||
| 23 | break; | ||
| 24 | case NVDEC_REG_INDEX(execute): | ||
| 25 | Execute(); | ||
| 26 | break; | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | AVFramePtr Nvdec::GetFrame() { | ||
| 31 | return codec->GetCurrentFrame(); | ||
| 32 | } | ||
| 33 | |||
| 34 | void Nvdec::Execute() { | ||
| 35 | switch (codec->GetCurrentCodec()) { | ||
| 36 | case NvdecCommon::VideoCodec::H264: | ||
| 37 | case NvdecCommon::VideoCodec::VP8: | ||
| 38 | case NvdecCommon::VideoCodec::VP9: | ||
| 39 | codec->Decode(); | ||
| 40 | break; | ||
| 41 | default: | ||
| 42 | UNIMPLEMENTED_MSG("Codec {}", codec->GetCurrentCodecName()); | ||
| 43 | break; | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | } // namespace Tegra::Host1x | ||
diff --git a/src/video_core/host1x/nvdec.h b/src/video_core/host1x/nvdec.h new file mode 100644 index 000000000..41ba1f7a0 --- /dev/null +++ b/src/video_core/host1x/nvdec.h | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <memory> | ||
| 7 | #include <vector> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/host1x/codecs/codec.h" | ||
| 10 | |||
| 11 | namespace Tegra { | ||
| 12 | class GPU; | ||
| 13 | |||
| 14 | namespace Host1x { | ||
| 15 | |||
| 16 | class Nvdec { | ||
| 17 | public: | ||
| 18 | explicit Nvdec(GPU& gpu); | ||
| 19 | ~Nvdec(); | ||
| 20 | |||
| 21 | /// Writes the method into the state, Invoke Execute() if encountered | ||
| 22 | void ProcessMethod(u32 method, u32 argument); | ||
| 23 | |||
| 24 | /// Return most recently decoded frame | ||
| 25 | [[nodiscard]] AVFramePtr GetFrame(); | ||
| 26 | |||
| 27 | private: | ||
| 28 | /// Invoke codec to decode a frame | ||
| 29 | void Execute(); | ||
| 30 | |||
| 31 | GPU& gpu; | ||
| 32 | NvdecCommon::NvdecRegisters state; | ||
| 33 | std::unique_ptr<Codec> codec; | ||
| 34 | }; | ||
| 35 | |||
| 36 | } // namespace Host1x | ||
| 37 | |||
| 38 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/nvdec_common.h b/src/video_core/host1x/nvdec_common.h new file mode 100644 index 000000000..49d67ebbe --- /dev/null +++ b/src/video_core/host1x/nvdec_common.h | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/bit_field.h" | ||
| 7 | #include "common/common_funcs.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | namespace Tegra::Host1x::NvdecCommon { | ||
| 11 | |||
| 12 | enum class VideoCodec : u64 { | ||
| 13 | None = 0x0, | ||
| 14 | H264 = 0x3, | ||
| 15 | VP8 = 0x5, | ||
| 16 | H265 = 0x7, | ||
| 17 | VP9 = 0x9, | ||
| 18 | }; | ||
| 19 | |||
| 20 | // NVDEC should use a 32-bit address space, but is mapped to 64-bit, | ||
| 21 | // doubling the sizes here is compensating for that. | ||
| 22 | struct NvdecRegisters { | ||
| 23 | static constexpr std::size_t NUM_REGS = 0x178; | ||
| 24 | |||
| 25 | union { | ||
| 26 | struct { | ||
| 27 | INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000 | ||
| 28 | VideoCodec set_codec_id; ///< 0x0400 | ||
| 29 | INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408 | ||
| 30 | u64 execute; ///< 0x0600 | ||
| 31 | INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608 | ||
| 32 | struct { ///< 0x0800 | ||
| 33 | union { | ||
| 34 | BitField<0, 3, VideoCodec> codec; | ||
| 35 | BitField<4, 1, u64> gp_timer_on; | ||
| 36 | BitField<13, 1, u64> mb_timer_on; | ||
| 37 | BitField<14, 1, u64> intra_frame_pslc; | ||
| 38 | BitField<17, 1, u64> all_intra_frame; | ||
| 39 | }; | ||
| 40 | } control_params; | ||
| 41 | u64 picture_info_offset; ///< 0x0808 | ||
| 42 | u64 frame_bitstream_offset; ///< 0x0810 | ||
| 43 | u64 frame_number; ///< 0x0818 | ||
| 44 | u64 h264_slice_data_offsets; ///< 0x0820 | ||
| 45 | u64 h264_mv_dump_offset; ///< 0x0828 | ||
| 46 | INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830 | ||
| 47 | u64 frame_stats_offset; ///< 0x0848 | ||
| 48 | u64 h264_last_surface_luma_offset; ///< 0x0850 | ||
| 49 | u64 h264_last_surface_chroma_offset; ///< 0x0858 | ||
| 50 | std::array<u64, 17> surface_luma_offset; ///< 0x0860 | ||
| 51 | std::array<u64, 17> surface_chroma_offset; ///< 0x08E8 | ||
| 52 | INSERT_PADDING_WORDS_NOINIT(68); ///< 0x0970 | ||
| 53 | u64 vp8_prob_data_offset; ///< 0x0A80 | ||
| 54 | u64 vp8_header_partition_buf_offset; ///< 0x0A88 | ||
| 55 | INSERT_PADDING_WORDS_NOINIT(60); ///< 0x0A90 | ||
| 56 | u64 vp9_entropy_probs_offset; ///< 0x0B80 | ||
| 57 | u64 vp9_backward_updates_offset; ///< 0x0B88 | ||
| 58 | u64 vp9_last_frame_segmap_offset; ///< 0x0B90 | ||
| 59 | u64 vp9_curr_frame_segmap_offset; ///< 0x0B98 | ||
| 60 | INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0 | ||
| 61 | u64 vp9_last_frame_mvs_offset; ///< 0x0BA8 | ||
| 62 | u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0 | ||
| 63 | INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8 | ||
| 64 | }; | ||
| 65 | std::array<u64, NUM_REGS> reg_array; | ||
| 66 | }; | ||
| 67 | }; | ||
| 68 | static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); | ||
| 69 | |||
| 70 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 71 | static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \ | ||
| 72 | "Field " #field_name " has invalid position") | ||
| 73 | |||
| 74 | ASSERT_REG_POSITION(set_codec_id, 0x80); | ||
| 75 | ASSERT_REG_POSITION(execute, 0xC0); | ||
| 76 | ASSERT_REG_POSITION(control_params, 0x100); | ||
| 77 | ASSERT_REG_POSITION(picture_info_offset, 0x101); | ||
| 78 | ASSERT_REG_POSITION(frame_bitstream_offset, 0x102); | ||
| 79 | ASSERT_REG_POSITION(frame_number, 0x103); | ||
| 80 | ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104); | ||
| 81 | ASSERT_REG_POSITION(frame_stats_offset, 0x109); | ||
| 82 | ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); | ||
| 83 | ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); | ||
| 84 | ASSERT_REG_POSITION(surface_luma_offset, 0x10C); | ||
| 85 | ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); | ||
| 86 | ASSERT_REG_POSITION(vp8_prob_data_offset, 0x150); | ||
| 87 | ASSERT_REG_POSITION(vp8_header_partition_buf_offset, 0x151); | ||
| 88 | ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); | ||
| 89 | ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); | ||
| 90 | ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); | ||
| 91 | ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); | ||
| 92 | ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); | ||
| 93 | ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); | ||
| 94 | |||
| 95 | #undef ASSERT_REG_POSITION | ||
| 96 | |||
| 97 | } // namespace Tegra::Host1x::NvdecCommon | ||
diff --git a/src/video_core/host1x/sync_manager.cpp b/src/video_core/host1x/sync_manager.cpp new file mode 100644 index 000000000..8694f77e2 --- /dev/null +++ b/src/video_core/host1x/sync_manager.cpp | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // SPDX-FileCopyrightText: Ryujinx Team and Contributors | ||
| 2 | // SPDX-License-Identifier: MIT | ||
| 3 | |||
| 4 | #include <algorithm> | ||
| 5 | #include "sync_manager.h" | ||
| 6 | #include "video_core/gpu.h" | ||
| 7 | #include "video_core/host1x/host1x.h" | ||
| 8 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 9 | |||
| 10 | namespace Tegra { | ||
| 11 | namespace Host1x { | ||
| 12 | |||
| 13 | SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {} | ||
| 14 | SyncptIncrManager::~SyncptIncrManager() = default; | ||
| 15 | |||
| 16 | void SyncptIncrManager::Increment(u32 id) { | ||
| 17 | increments.emplace_back(0, 0, id, true); | ||
| 18 | IncrementAllDone(); | ||
| 19 | } | ||
| 20 | |||
| 21 | u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) { | ||
| 22 | const u32 handle = current_id++; | ||
| 23 | increments.emplace_back(handle, class_id, id); | ||
| 24 | return handle; | ||
| 25 | } | ||
| 26 | |||
| 27 | void SyncptIncrManager::SignalDone(u32 handle) { | ||
| 28 | const auto done_incr = | ||
| 29 | std::find_if(increments.begin(), increments.end(), | ||
| 30 | [handle](const SyncptIncr& incr) { return incr.id == handle; }); | ||
| 31 | if (done_incr != increments.cend()) { | ||
| 32 | done_incr->complete = true; | ||
| 33 | } | ||
| 34 | IncrementAllDone(); | ||
| 35 | } | ||
| 36 | |||
| 37 | void SyncptIncrManager::IncrementAllDone() { | ||
| 38 | std::size_t done_count = 0; | ||
| 39 | for (; done_count < increments.size(); ++done_count) { | ||
| 40 | if (!increments[done_count].complete) { | ||
| 41 | break; | ||
| 42 | } | ||
| 43 | auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager(); | ||
| 44 | syncpoint_manager.IncrementGuest(increments[done_count].syncpt_id); | ||
| 45 | syncpoint_manager.IncrementHost(increments[done_count].syncpt_id); | ||
| 46 | } | ||
| 47 | increments.erase(increments.begin(), increments.begin() + done_count); | ||
| 48 | } | ||
| 49 | |||
| 50 | } // namespace Host1x | ||
| 51 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/sync_manager.h b/src/video_core/host1x/sync_manager.h new file mode 100644 index 000000000..aba72d5c5 --- /dev/null +++ b/src/video_core/host1x/sync_manager.h | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // SPDX-FileCopyrightText: Ryujinx Team and Contributors | ||
| 2 | // SPDX-License-Identifier: MIT | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <mutex> | ||
| 7 | #include <vector> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | namespace Tegra { | ||
| 11 | |||
| 12 | class GPU; | ||
| 13 | |||
| 14 | namespace Host1x { | ||
| 15 | |||
| 16 | struct SyncptIncr { | ||
| 17 | u32 id; | ||
| 18 | u32 class_id; | ||
| 19 | u32 syncpt_id; | ||
| 20 | bool complete; | ||
| 21 | |||
| 22 | SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false) | ||
| 23 | : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {} | ||
| 24 | }; | ||
| 25 | |||
| 26 | class SyncptIncrManager { | ||
| 27 | public: | ||
| 28 | explicit SyncptIncrManager(GPU& gpu); | ||
| 29 | ~SyncptIncrManager(); | ||
| 30 | |||
| 31 | /// Add syncpoint id and increment all | ||
| 32 | void Increment(u32 id); | ||
| 33 | |||
| 34 | /// Returns a handle to increment later | ||
| 35 | u32 IncrementWhenDone(u32 class_id, u32 id); | ||
| 36 | |||
| 37 | /// IncrememntAllDone, including handle | ||
| 38 | void SignalDone(u32 handle); | ||
| 39 | |||
| 40 | /// Increment all sequential pending increments that are already done. | ||
| 41 | void IncrementAllDone(); | ||
| 42 | |||
| 43 | private: | ||
| 44 | std::vector<SyncptIncr> increments; | ||
| 45 | std::mutex increment_lock; | ||
| 46 | u32 current_id{}; | ||
| 47 | |||
| 48 | GPU& gpu; | ||
| 49 | }; | ||
| 50 | |||
| 51 | } // namespace Host1x | ||
| 52 | |||
| 53 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp new file mode 100644 index 000000000..c606b8bd0 --- /dev/null +++ b/src/video_core/host1x/syncpoint_manager.cpp | |||
| @@ -0,0 +1,93 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 6 | |||
| 7 | namespace Tegra { | ||
| 8 | |||
| 9 | namespace Host1x { | ||
| 10 | |||
| 11 | SyncpointManager::ActionHandle SyncpointManager::RegisterAction( | ||
| 12 | std::atomic<u32>& syncpoint, std::list<RegisteredAction>& action_storage, u32 expected_value, | ||
| 13 | std::function<void(void)>& action) { | ||
| 14 | if (syncpoint.load(std::memory_order_acquire) >= expected_value) { | ||
| 15 | action(); | ||
| 16 | return {}; | ||
| 17 | } | ||
| 18 | |||
| 19 | std::unique_lock<std::mutex> lk(guard); | ||
| 20 | if (syncpoint.load(std::memory_order_relaxed) >= expected_value) { | ||
| 21 | action(); | ||
| 22 | return {}; | ||
| 23 | } | ||
| 24 | auto it = action_storage.begin(); | ||
| 25 | while (it != action_storage.end()) { | ||
| 26 | if (it->expected_value >= expected_value) { | ||
| 27 | break; | ||
| 28 | } | ||
| 29 | ++it; | ||
| 30 | } | ||
| 31 | return action_storage.emplace(it, expected_value, action); | ||
| 32 | } | ||
| 33 | |||
| 34 | void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage, | ||
| 35 | ActionHandle& handle) { | ||
| 36 | std::unique_lock<std::mutex> lk(guard); | ||
| 37 | action_storage.erase(handle); | ||
| 38 | } | ||
| 39 | |||
| 40 | void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) { | ||
| 41 | DeregisterAction(guest_action_storage[syncpoint_id], handle); | ||
| 42 | } | ||
| 43 | |||
| 44 | void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle) { | ||
| 45 | DeregisterAction(host_action_storage[syncpoint_id], handle); | ||
| 46 | } | ||
| 47 | |||
| 48 | void SyncpointManager::IncrementGuest(u32 syncpoint_id) { | ||
| 49 | Increment(syncpoints_guest[syncpoint_id], wait_guest_cv, guest_action_storage[syncpoint_id]); | ||
| 50 | } | ||
| 51 | |||
| 52 | void SyncpointManager::IncrementHost(u32 syncpoint_id) { | ||
| 53 | Increment(syncpoints_host[syncpoint_id], wait_host_cv, host_action_storage[syncpoint_id]); | ||
| 54 | } | ||
| 55 | |||
| 56 | void SyncpointManager::WaitGuest(u32 syncpoint_id, u32 expected_value) { | ||
| 57 | Wait(syncpoints_guest[syncpoint_id], wait_guest_cv, expected_value); | ||
| 58 | } | ||
| 59 | |||
| 60 | void SyncpointManager::WaitHost(u32 syncpoint_id, u32 expected_value) { | ||
| 61 | Wait(syncpoints_host[syncpoint_id], wait_host_cv, expected_value); | ||
| 62 | } | ||
| 63 | |||
| 64 | void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, | ||
| 65 | std::list<RegisteredAction>& action_storage) { | ||
| 66 | auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1}; | ||
| 67 | |||
| 68 | std::unique_lock<std::mutex> lk(guard); | ||
| 69 | auto it = action_storage.begin(); | ||
| 70 | while (it != action_storage.end()) { | ||
| 71 | if (it->expected_value > new_value) { | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | it->action(); | ||
| 75 | it = action_storage.erase(it); | ||
| 76 | } | ||
| 77 | wait_cv.notify_all(); | ||
| 78 | } | ||
| 79 | |||
| 80 | void SyncpointManager::Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, | ||
| 81 | u32 expected_value) { | ||
| 82 | const auto pred = [&]() { return syncpoint.load(std::memory_order_acquire) >= expected_value; }; | ||
| 83 | if (pred()) { | ||
| 84 | return; | ||
| 85 | } | ||
| 86 | |||
| 87 | std::unique_lock<std::mutex> lk(guard); | ||
| 88 | wait_cv.wait(lk, pred); | ||
| 89 | } | ||
| 90 | |||
| 91 | } // namespace Host1x | ||
| 92 | |||
| 93 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h new file mode 100644 index 000000000..0ecc040ab --- /dev/null +++ b/src/video_core/host1x/syncpoint_manager.h | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <condition_variable> | ||
| 10 | #include <functional> | ||
| 11 | #include <list> | ||
| 12 | #include <mutex> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | |||
| 16 | namespace Tegra { | ||
| 17 | |||
| 18 | namespace Host1x { | ||
| 19 | |||
| 20 | class SyncpointManager { | ||
| 21 | public: | ||
| 22 | u32 GetGuestSyncpointValue(u32 id) { | ||
| 23 | return syncpoints_guest[id].load(std::memory_order_acquire); | ||
| 24 | } | ||
| 25 | |||
| 26 | u32 GetHostSyncpointValue(u32 id) { | ||
| 27 | return syncpoints_host[id].load(std::memory_order_acquire); | ||
| 28 | } | ||
| 29 | |||
| 30 | struct RegisteredAction { | ||
| 31 | RegisteredAction(u32 expected_value_, std::function<void(void)>& action_) | ||
| 32 | : expected_value{expected_value_}, action{action_} {} | ||
| 33 | u32 expected_value; | ||
| 34 | std::function<void(void)> action; | ||
| 35 | }; | ||
| 36 | using ActionHandle = std::list<RegisteredAction>::iterator; | ||
| 37 | |||
| 38 | template <typename Func> | ||
| 39 | ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) { | ||
| 40 | std::function<void(void)> func(action); | ||
| 41 | return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id], | ||
| 42 | expected_value, func); | ||
| 43 | } | ||
| 44 | |||
| 45 | template <typename Func> | ||
| 46 | ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) { | ||
| 47 | std::function<void(void)> func(action); | ||
| 48 | return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id], | ||
| 49 | expected_value, func); | ||
| 50 | } | ||
| 51 | |||
| 52 | void DeregisterGuestAction(u32 syncpoint_id,ActionHandle& handle); | ||
| 53 | |||
| 54 | void DeregisterHostAction(u32 syncpoint_id,ActionHandle& handle); | ||
| 55 | |||
| 56 | void IncrementGuest(u32 syncpoint_id); | ||
| 57 | |||
| 58 | void IncrementHost(u32 syncpoint_id); | ||
| 59 | |||
| 60 | void WaitGuest(u32 syncpoint_id, u32 expected_value); | ||
| 61 | |||
| 62 | void WaitHost(u32 syncpoint_id, u32 expected_value); | ||
| 63 | |||
| 64 | bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) { | ||
| 65 | return syncpoints_guest[syncpoint_id].load(std::memory_order_acquire) >= expected_value; | ||
| 66 | } | ||
| 67 | |||
| 68 | bool IsReadyHost(u32 syncpoint_id, u32 expected_value) { | ||
| 69 | return syncpoints_host[syncpoint_id].load(std::memory_order_acquire) >= expected_value; | ||
| 70 | } | ||
| 71 | |||
| 72 | private: | ||
| 73 | void Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, | ||
| 74 | std::list<RegisteredAction>& action_storage); | ||
| 75 | |||
| 76 | ActionHandle RegisterAction(std::atomic<u32>& syncpoint, | ||
| 77 | std::list<RegisteredAction>& action_storage, u32 expected_value, | ||
| 78 | std::function<void(void)>& action); | ||
| 79 | |||
| 80 | void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle); | ||
| 81 | |||
| 82 | void Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, u32 expected_value); | ||
| 83 | |||
| 84 | static constexpr size_t NUM_MAX_SYNCPOINTS = 192; | ||
| 85 | |||
| 86 | std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_guest{}; | ||
| 87 | std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_host{}; | ||
| 88 | |||
| 89 | std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> guest_action_storage; | ||
| 90 | std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> host_action_storage; | ||
| 91 | |||
| 92 | std::mutex guard; | ||
| 93 | std::condition_variable wait_guest_cv; | ||
| 94 | std::condition_variable wait_host_cv; | ||
| 95 | }; | ||
| 96 | |||
| 97 | } // namespace Host1x | ||
| 98 | |||
| 99 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp new file mode 100644 index 000000000..a9422670a --- /dev/null +++ b/src/video_core/host1x/vic.cpp | |||
| @@ -0,0 +1,243 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <array> | ||
| 5 | |||
| 6 | extern "C" { | ||
| 7 | #if defined(__GNUC__) || defined(__clang__) | ||
| 8 | #pragma GCC diagnostic push | ||
| 9 | #pragma GCC diagnostic ignored "-Wconversion" | ||
| 10 | #endif | ||
| 11 | #include <libswscale/swscale.h> | ||
| 12 | #if defined(__GNUC__) || defined(__clang__) | ||
| 13 | #pragma GCC diagnostic pop | ||
| 14 | #endif | ||
| 15 | } | ||
| 16 | |||
| 17 | #include "common/assert.h" | ||
| 18 | #include "common/bit_field.h" | ||
| 19 | #include "common/logging/log.h" | ||
| 20 | |||
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/gpu.h" | ||
| 23 | #include "video_core/host1x/nvdec.h" | ||
| 24 | #include "video_core/host1x/vic.h" | ||
| 25 | #include "video_core/memory_manager.h" | ||
| 26 | #include "video_core/textures/decoders.h" | ||
| 27 | |||
| 28 | namespace Tegra { | ||
| 29 | |||
| 30 | namespace Host1x { | ||
| 31 | |||
| 32 | namespace { | ||
| 33 | enum class VideoPixelFormat : u64_le { | ||
| 34 | RGBA8 = 0x1f, | ||
| 35 | BGRA8 = 0x20, | ||
| 36 | RGBX8 = 0x23, | ||
| 37 | YUV420 = 0x44, | ||
| 38 | }; | ||
| 39 | } // Anonymous namespace | ||
| 40 | |||
| 41 | union VicConfig { | ||
| 42 | u64_le raw{}; | ||
| 43 | BitField<0, 7, VideoPixelFormat> pixel_format; | ||
| 44 | BitField<7, 2, u64_le> chroma_loc_horiz; | ||
| 45 | BitField<9, 2, u64_le> chroma_loc_vert; | ||
| 46 | BitField<11, 4, u64_le> block_linear_kind; | ||
| 47 | BitField<15, 4, u64_le> block_linear_height_log2; | ||
| 48 | BitField<32, 14, u64_le> surface_width_minus1; | ||
| 49 | BitField<46, 14, u64_le> surface_height_minus1; | ||
| 50 | }; | ||
| 51 | |||
| 52 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) | ||
| 53 | : gpu(gpu_), | ||
| 54 | nvdec_processor(std::move(nvdec_processor_)), converted_frame_buffer{nullptr, av_free} {} | ||
| 55 | |||
| 56 | Vic::~Vic() = default; | ||
| 57 | |||
| 58 | void Vic::ProcessMethod(Method method, u32 argument) { | ||
| 59 | LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method)); | ||
| 60 | const u64 arg = static_cast<u64>(argument) << 8; | ||
| 61 | switch (method) { | ||
| 62 | case Method::Execute: | ||
| 63 | Execute(); | ||
| 64 | break; | ||
| 65 | case Method::SetConfigStructOffset: | ||
| 66 | config_struct_address = arg; | ||
| 67 | break; | ||
| 68 | case Method::SetOutputSurfaceLumaOffset: | ||
| 69 | output_surface_luma_address = arg; | ||
| 70 | break; | ||
| 71 | case Method::SetOutputSurfaceChromaOffset: | ||
| 72 | output_surface_chroma_address = arg; | ||
| 73 | break; | ||
| 74 | default: | ||
| 75 | break; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | void Vic::Execute() { | ||
| 80 | if (output_surface_luma_address == 0) { | ||
| 81 | LOG_ERROR(Service_NVDRV, "VIC Luma address not set."); | ||
| 82 | return; | ||
| 83 | } | ||
| 84 | const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)}; | ||
| 85 | const AVFramePtr frame_ptr = nvdec_processor->GetFrame(); | ||
| 86 | const auto* frame = frame_ptr.get(); | ||
| 87 | if (!frame) { | ||
| 88 | return; | ||
| 89 | } | ||
| 90 | const u64 surface_width = config.surface_width_minus1 + 1; | ||
| 91 | const u64 surface_height = config.surface_height_minus1 + 1; | ||
| 92 | if (static_cast<u64>(frame->width) != surface_width || | ||
| 93 | static_cast<u64>(frame->height) != surface_height) { | ||
| 94 | // TODO: Properly support multiple video streams with differing frame dimensions | ||
| 95 | LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}", | ||
| 96 | frame->width, frame->height, surface_width, surface_height); | ||
| 97 | } | ||
| 98 | switch (config.pixel_format) { | ||
| 99 | case VideoPixelFormat::RGBA8: | ||
| 100 | case VideoPixelFormat::BGRA8: | ||
| 101 | case VideoPixelFormat::RGBX8: | ||
| 102 | WriteRGBFrame(frame, config); | ||
| 103 | break; | ||
| 104 | case VideoPixelFormat::YUV420: | ||
| 105 | WriteYUVFrame(frame, config); | ||
| 106 | break; | ||
| 107 | default: | ||
| 108 | UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value()); | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { | ||
| 114 | LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); | ||
| 115 | |||
| 116 | if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) { | ||
| 117 | const AVPixelFormat target_format = [pixel_format = config.pixel_format]() { | ||
| 118 | switch (pixel_format) { | ||
| 119 | case VideoPixelFormat::RGBA8: | ||
| 120 | return AV_PIX_FMT_RGBA; | ||
| 121 | case VideoPixelFormat::BGRA8: | ||
| 122 | return AV_PIX_FMT_BGRA; | ||
| 123 | case VideoPixelFormat::RGBX8: | ||
| 124 | return AV_PIX_FMT_RGB0; | ||
| 125 | default: | ||
| 126 | return AV_PIX_FMT_RGBA; | ||
| 127 | } | ||
| 128 | }(); | ||
| 129 | |||
| 130 | sws_freeContext(scaler_ctx); | ||
| 131 | // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format | ||
| 132 | scaler_ctx = sws_getContext(frame->width, frame->height, | ||
| 133 | static_cast<AVPixelFormat>(frame->format), frame->width, | ||
| 134 | frame->height, target_format, 0, nullptr, nullptr, nullptr); | ||
| 135 | scaler_width = frame->width; | ||
| 136 | scaler_height = frame->height; | ||
| 137 | converted_frame_buffer.reset(); | ||
| 138 | } | ||
| 139 | if (!converted_frame_buffer) { | ||
| 140 | const size_t frame_size = frame->width * frame->height * 4; | ||
| 141 | converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free}; | ||
| 142 | } | ||
| 143 | const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; | ||
| 144 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; | ||
| 145 | sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr, | ||
| 146 | converted_stride.data()); | ||
| 147 | |||
| 148 | // Use the minimum of surface/frame dimensions to avoid buffer overflow. | ||
| 149 | const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1; | ||
| 150 | const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1; | ||
| 151 | const u32 width = std::min(surface_width, static_cast<u32>(frame->width)); | ||
| 152 | const u32 height = std::min(surface_height, static_cast<u32>(frame->height)); | ||
| 153 | const u32 blk_kind = static_cast<u32>(config.block_linear_kind); | ||
| 154 | if (blk_kind != 0) { | ||
| 155 | // swizzle pitch linear to block linear | ||
| 156 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | ||
| 157 | const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); | ||
| 158 | luma_buffer.resize(size); | ||
| 159 | Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), | ||
| 160 | converted_frame_buf_addr, block_height, 0, 0); | ||
| 161 | |||
| 162 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); | ||
| 163 | } else { | ||
| 164 | // send pitch linear frame | ||
| 165 | const size_t linear_size = width * height * 4; | ||
| 166 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | ||
| 167 | linear_size); | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { | ||
| 172 | LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); | ||
| 173 | |||
| 174 | const std::size_t surface_width = config.surface_width_minus1 + 1; | ||
| 175 | const std::size_t surface_height = config.surface_height_minus1 + 1; | ||
| 176 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; | ||
| 177 | // Use the minimum of surface/frame dimensions to avoid buffer overflow. | ||
| 178 | const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); | ||
| 179 | const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); | ||
| 180 | |||
| 181 | const auto stride = static_cast<size_t>(frame->linesize[0]); | ||
| 182 | |||
| 183 | luma_buffer.resize(aligned_width * surface_height); | ||
| 184 | chroma_buffer.resize(aligned_width * surface_height / 2); | ||
| 185 | |||
| 186 | // Populate luma buffer | ||
| 187 | const u8* luma_src = frame->data[0]; | ||
| 188 | for (std::size_t y = 0; y < frame_height; ++y) { | ||
| 189 | const std::size_t src = y * stride; | ||
| 190 | const std::size_t dst = y * aligned_width; | ||
| 191 | for (std::size_t x = 0; x < frame_width; ++x) { | ||
| 192 | luma_buffer[dst + x] = luma_src[src + x]; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), | ||
| 196 | luma_buffer.size()); | ||
| 197 | |||
| 198 | // Chroma | ||
| 199 | const std::size_t half_height = frame_height / 2; | ||
| 200 | const auto half_stride = static_cast<size_t>(frame->linesize[1]); | ||
| 201 | |||
| 202 | switch (frame->format) { | ||
| 203 | case AV_PIX_FMT_YUV420P: { | ||
| 204 | // Frame from FFmpeg software | ||
| 205 | // Populate chroma buffer from both channels with interleaving. | ||
| 206 | const std::size_t half_width = frame_width / 2; | ||
| 207 | const u8* chroma_b_src = frame->data[1]; | ||
| 208 | const u8* chroma_r_src = frame->data[2]; | ||
| 209 | for (std::size_t y = 0; y < half_height; ++y) { | ||
| 210 | const std::size_t src = y * half_stride; | ||
| 211 | const std::size_t dst = y * aligned_width; | ||
| 212 | |||
| 213 | for (std::size_t x = 0; x < half_width; ++x) { | ||
| 214 | chroma_buffer[dst + x * 2] = chroma_b_src[src + x]; | ||
| 215 | chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x]; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | break; | ||
| 219 | } | ||
| 220 | case AV_PIX_FMT_NV12: { | ||
| 221 | // Frame from VA-API hardware | ||
| 222 | // This is already interleaved so just copy | ||
| 223 | const u8* chroma_src = frame->data[1]; | ||
| 224 | for (std::size_t y = 0; y < half_height; ++y) { | ||
| 225 | const std::size_t src = y * stride; | ||
| 226 | const std::size_t dst = y * aligned_width; | ||
| 227 | for (std::size_t x = 0; x < frame_width; ++x) { | ||
| 228 | chroma_buffer[dst + x] = chroma_src[src + x]; | ||
| 229 | } | ||
| 230 | } | ||
| 231 | break; | ||
| 232 | } | ||
| 233 | default: | ||
| 234 | ASSERT(false); | ||
| 235 | break; | ||
| 236 | } | ||
| 237 | gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), | ||
| 238 | chroma_buffer.size()); | ||
| 239 | } | ||
| 240 | |||
| 241 | } // namespace Host1x | ||
| 242 | |||
| 243 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/vic.h b/src/video_core/host1x/vic.h new file mode 100644 index 000000000..c51f8af7e --- /dev/null +++ b/src/video_core/host1x/vic.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <memory> | ||
| 7 | #include <vector> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | struct SwsContext; | ||
| 11 | |||
| 12 | namespace Tegra { | ||
| 13 | class GPU; | ||
| 14 | |||
| 15 | namespace Host1x { | ||
| 16 | |||
| 17 | class Nvdec; | ||
| 18 | union VicConfig; | ||
| 19 | |||
| 20 | class Vic { | ||
| 21 | public: | ||
| 22 | enum class Method : u32 { | ||
| 23 | Execute = 0xc0, | ||
| 24 | SetControlParams = 0x1c1, | ||
| 25 | SetConfigStructOffset = 0x1c2, | ||
| 26 | SetOutputSurfaceLumaOffset = 0x1c8, | ||
| 27 | SetOutputSurfaceChromaOffset = 0x1c9, | ||
| 28 | SetOutputSurfaceChromaUnusedOffset = 0x1ca | ||
| 29 | }; | ||
| 30 | |||
| 31 | explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); | ||
| 32 | |||
| 33 | ~Vic(); | ||
| 34 | |||
| 35 | /// Write to the device state. | ||
| 36 | void ProcessMethod(Method method, u32 argument); | ||
| 37 | |||
| 38 | private: | ||
| 39 | void Execute(); | ||
| 40 | |||
| 41 | void WriteRGBFrame(const AVFrame* frame, const VicConfig& config); | ||
| 42 | |||
| 43 | void WriteYUVFrame(const AVFrame* frame, const VicConfig& config); | ||
| 44 | |||
| 45 | GPU& gpu; | ||
| 46 | std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; | ||
| 47 | |||
| 48 | /// Avoid reallocation of the following buffers every frame, as their | ||
| 49 | /// size does not change during a stream | ||
| 50 | using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; | ||
| 51 | AVMallocPtr converted_frame_buffer; | ||
| 52 | std::vector<u8> luma_buffer; | ||
| 53 | std::vector<u8> chroma_buffer; | ||
| 54 | |||
| 55 | GPUVAddr config_struct_address{}; | ||
| 56 | GPUVAddr output_surface_luma_address{}; | ||
| 57 | GPUVAddr output_surface_chroma_address{}; | ||
| 58 | |||
| 59 | SwsContext* scaler_ctx{}; | ||
| 60 | s32 scaler_width{}; | ||
| 61 | s32 scaler_height{}; | ||
| 62 | }; | ||
| 63 | |||
| 64 | } // namespace Host1x | ||
| 65 | |||
| 66 | } // namespace Tegra | ||