diff options
Diffstat (limited to 'src/video_core/host1x/vic.cpp')
| -rw-r--r-- | src/video_core/host1x/vic.cpp | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp new file mode 100644 index 000000000..a9422670a --- /dev/null +++ b/src/video_core/host1x/vic.cpp | |||
| @@ -0,0 +1,243 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <array> | ||
| 5 | |||
| 6 | extern "C" { | ||
| 7 | #if defined(__GNUC__) || defined(__clang__) | ||
| 8 | #pragma GCC diagnostic push | ||
| 9 | #pragma GCC diagnostic ignored "-Wconversion" | ||
| 10 | #endif | ||
| 11 | #include <libswscale/swscale.h> | ||
| 12 | #if defined(__GNUC__) || defined(__clang__) | ||
| 13 | #pragma GCC diagnostic pop | ||
| 14 | #endif | ||
| 15 | } | ||
| 16 | |||
| 17 | #include "common/assert.h" | ||
| 18 | #include "common/bit_field.h" | ||
| 19 | #include "common/logging/log.h" | ||
| 20 | |||
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/gpu.h" | ||
| 23 | #include "video_core/host1x/nvdec.h" | ||
| 24 | #include "video_core/host1x/vic.h" | ||
| 25 | #include "video_core/memory_manager.h" | ||
| 26 | #include "video_core/textures/decoders.h" | ||
| 27 | |||
| 28 | namespace Tegra { | ||
| 29 | |||
| 30 | namespace Host1x { | ||
| 31 | |||
| 32 | namespace { | ||
| 33 | enum class VideoPixelFormat : u64_le { | ||
| 34 | RGBA8 = 0x1f, | ||
| 35 | BGRA8 = 0x20, | ||
| 36 | RGBX8 = 0x23, | ||
| 37 | YUV420 = 0x44, | ||
| 38 | }; | ||
| 39 | } // Anonymous namespace | ||
| 40 | |||
| 41 | union VicConfig { | ||
| 42 | u64_le raw{}; | ||
| 43 | BitField<0, 7, VideoPixelFormat> pixel_format; | ||
| 44 | BitField<7, 2, u64_le> chroma_loc_horiz; | ||
| 45 | BitField<9, 2, u64_le> chroma_loc_vert; | ||
| 46 | BitField<11, 4, u64_le> block_linear_kind; | ||
| 47 | BitField<15, 4, u64_le> block_linear_height_log2; | ||
| 48 | BitField<32, 14, u64_le> surface_width_minus1; | ||
| 49 | BitField<46, 14, u64_le> surface_height_minus1; | ||
| 50 | }; | ||
| 51 | |||
| 52 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) | ||
| 53 | : gpu(gpu_), | ||
| 54 | nvdec_processor(std::move(nvdec_processor_)), converted_frame_buffer{nullptr, av_free} {} | ||
| 55 | |||
| 56 | Vic::~Vic() = default; | ||
| 57 | |||
| 58 | void Vic::ProcessMethod(Method method, u32 argument) { | ||
| 59 | LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method)); | ||
| 60 | const u64 arg = static_cast<u64>(argument) << 8; | ||
| 61 | switch (method) { | ||
| 62 | case Method::Execute: | ||
| 63 | Execute(); | ||
| 64 | break; | ||
| 65 | case Method::SetConfigStructOffset: | ||
| 66 | config_struct_address = arg; | ||
| 67 | break; | ||
| 68 | case Method::SetOutputSurfaceLumaOffset: | ||
| 69 | output_surface_luma_address = arg; | ||
| 70 | break; | ||
| 71 | case Method::SetOutputSurfaceChromaOffset: | ||
| 72 | output_surface_chroma_address = arg; | ||
| 73 | break; | ||
| 74 | default: | ||
| 75 | break; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | void Vic::Execute() { | ||
| 80 | if (output_surface_luma_address == 0) { | ||
| 81 | LOG_ERROR(Service_NVDRV, "VIC Luma address not set."); | ||
| 82 | return; | ||
| 83 | } | ||
| 84 | const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)}; | ||
| 85 | const AVFramePtr frame_ptr = nvdec_processor->GetFrame(); | ||
| 86 | const auto* frame = frame_ptr.get(); | ||
| 87 | if (!frame) { | ||
| 88 | return; | ||
| 89 | } | ||
| 90 | const u64 surface_width = config.surface_width_minus1 + 1; | ||
| 91 | const u64 surface_height = config.surface_height_minus1 + 1; | ||
| 92 | if (static_cast<u64>(frame->width) != surface_width || | ||
| 93 | static_cast<u64>(frame->height) != surface_height) { | ||
| 94 | // TODO: Properly support multiple video streams with differing frame dimensions | ||
| 95 | LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}", | ||
| 96 | frame->width, frame->height, surface_width, surface_height); | ||
| 97 | } | ||
| 98 | switch (config.pixel_format) { | ||
| 99 | case VideoPixelFormat::RGBA8: | ||
| 100 | case VideoPixelFormat::BGRA8: | ||
| 101 | case VideoPixelFormat::RGBX8: | ||
| 102 | WriteRGBFrame(frame, config); | ||
| 103 | break; | ||
| 104 | case VideoPixelFormat::YUV420: | ||
| 105 | WriteYUVFrame(frame, config); | ||
| 106 | break; | ||
| 107 | default: | ||
| 108 | UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value()); | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { | ||
| 114 | LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); | ||
| 115 | |||
| 116 | if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) { | ||
| 117 | const AVPixelFormat target_format = [pixel_format = config.pixel_format]() { | ||
| 118 | switch (pixel_format) { | ||
| 119 | case VideoPixelFormat::RGBA8: | ||
| 120 | return AV_PIX_FMT_RGBA; | ||
| 121 | case VideoPixelFormat::BGRA8: | ||
| 122 | return AV_PIX_FMT_BGRA; | ||
| 123 | case VideoPixelFormat::RGBX8: | ||
| 124 | return AV_PIX_FMT_RGB0; | ||
| 125 | default: | ||
| 126 | return AV_PIX_FMT_RGBA; | ||
| 127 | } | ||
| 128 | }(); | ||
| 129 | |||
| 130 | sws_freeContext(scaler_ctx); | ||
| 131 | // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format | ||
| 132 | scaler_ctx = sws_getContext(frame->width, frame->height, | ||
| 133 | static_cast<AVPixelFormat>(frame->format), frame->width, | ||
| 134 | frame->height, target_format, 0, nullptr, nullptr, nullptr); | ||
| 135 | scaler_width = frame->width; | ||
| 136 | scaler_height = frame->height; | ||
| 137 | converted_frame_buffer.reset(); | ||
| 138 | } | ||
| 139 | if (!converted_frame_buffer) { | ||
| 140 | const size_t frame_size = frame->width * frame->height * 4; | ||
| 141 | converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free}; | ||
| 142 | } | ||
| 143 | const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; | ||
| 144 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; | ||
| 145 | sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr, | ||
| 146 | converted_stride.data()); | ||
| 147 | |||
| 148 | // Use the minimum of surface/frame dimensions to avoid buffer overflow. | ||
| 149 | const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1; | ||
| 150 | const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1; | ||
| 151 | const u32 width = std::min(surface_width, static_cast<u32>(frame->width)); | ||
| 152 | const u32 height = std::min(surface_height, static_cast<u32>(frame->height)); | ||
| 153 | const u32 blk_kind = static_cast<u32>(config.block_linear_kind); | ||
| 154 | if (blk_kind != 0) { | ||
| 155 | // swizzle pitch linear to block linear | ||
| 156 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | ||
| 157 | const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); | ||
| 158 | luma_buffer.resize(size); | ||
| 159 | Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), | ||
| 160 | converted_frame_buf_addr, block_height, 0, 0); | ||
| 161 | |||
| 162 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); | ||
| 163 | } else { | ||
| 164 | // send pitch linear frame | ||
| 165 | const size_t linear_size = width * height * 4; | ||
| 166 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | ||
| 167 | linear_size); | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { | ||
| 172 | LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); | ||
| 173 | |||
| 174 | const std::size_t surface_width = config.surface_width_minus1 + 1; | ||
| 175 | const std::size_t surface_height = config.surface_height_minus1 + 1; | ||
| 176 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; | ||
| 177 | // Use the minimum of surface/frame dimensions to avoid buffer overflow. | ||
| 178 | const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); | ||
| 179 | const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); | ||
| 180 | |||
| 181 | const auto stride = static_cast<size_t>(frame->linesize[0]); | ||
| 182 | |||
| 183 | luma_buffer.resize(aligned_width * surface_height); | ||
| 184 | chroma_buffer.resize(aligned_width * surface_height / 2); | ||
| 185 | |||
| 186 | // Populate luma buffer | ||
| 187 | const u8* luma_src = frame->data[0]; | ||
| 188 | for (std::size_t y = 0; y < frame_height; ++y) { | ||
| 189 | const std::size_t src = y * stride; | ||
| 190 | const std::size_t dst = y * aligned_width; | ||
| 191 | for (std::size_t x = 0; x < frame_width; ++x) { | ||
| 192 | luma_buffer[dst + x] = luma_src[src + x]; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), | ||
| 196 | luma_buffer.size()); | ||
| 197 | |||
| 198 | // Chroma | ||
| 199 | const std::size_t half_height = frame_height / 2; | ||
| 200 | const auto half_stride = static_cast<size_t>(frame->linesize[1]); | ||
| 201 | |||
| 202 | switch (frame->format) { | ||
| 203 | case AV_PIX_FMT_YUV420P: { | ||
| 204 | // Frame from FFmpeg software | ||
| 205 | // Populate chroma buffer from both channels with interleaving. | ||
| 206 | const std::size_t half_width = frame_width / 2; | ||
| 207 | const u8* chroma_b_src = frame->data[1]; | ||
| 208 | const u8* chroma_r_src = frame->data[2]; | ||
| 209 | for (std::size_t y = 0; y < half_height; ++y) { | ||
| 210 | const std::size_t src = y * half_stride; | ||
| 211 | const std::size_t dst = y * aligned_width; | ||
| 212 | |||
| 213 | for (std::size_t x = 0; x < half_width; ++x) { | ||
| 214 | chroma_buffer[dst + x * 2] = chroma_b_src[src + x]; | ||
| 215 | chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x]; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | break; | ||
| 219 | } | ||
| 220 | case AV_PIX_FMT_NV12: { | ||
| 221 | // Frame from VA-API hardware | ||
| 222 | // This is already interleaved so just copy | ||
| 223 | const u8* chroma_src = frame->data[1]; | ||
| 224 | for (std::size_t y = 0; y < half_height; ++y) { | ||
| 225 | const std::size_t src = y * stride; | ||
| 226 | const std::size_t dst = y * aligned_width; | ||
| 227 | for (std::size_t x = 0; x < frame_width; ++x) { | ||
| 228 | chroma_buffer[dst + x] = chroma_src[src + x]; | ||
| 229 | } | ||
| 230 | } | ||
| 231 | break; | ||
| 232 | } | ||
| 233 | default: | ||
| 234 | ASSERT(false); | ||
| 235 | break; | ||
| 236 | } | ||
| 237 | gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), | ||
| 238 | chroma_buffer.size()); | ||
| 239 | } | ||
| 240 | |||
| 241 | } // namespace Host1x | ||
| 242 | |||
| 243 | } // namespace Tegra | ||