diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/command_classes/vic.cpp | 259 | ||||
| -rw-r--r-- | src/video_core/command_classes/vic.h | 20 |
2 files changed, 154 insertions, 125 deletions
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 0ee07f398..51f739801 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp | |||
| @@ -16,6 +16,7 @@ extern "C" { | |||
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 19 | #include "common/bit_field.h" | ||
| 19 | #include "common/logging/log.h" | 20 | #include "common/logging/log.h" |
| 20 | 21 | ||
| 21 | #include "video_core/command_classes/nvdec.h" | 22 | #include "video_core/command_classes/nvdec.h" |
| @@ -26,6 +27,25 @@ extern "C" { | |||
| 26 | #include "video_core/textures/decoders.h" | 27 | #include "video_core/textures/decoders.h" |
| 27 | 28 | ||
| 28 | namespace Tegra { | 29 | namespace Tegra { |
| 30 | namespace { | ||
| 31 | enum class VideoPixelFormat : u64_le { | ||
| 32 | RGBA8 = 0x1f, | ||
| 33 | BGRA8 = 0x20, | ||
| 34 | RGBX8 = 0x23, | ||
| 35 | Yuv420 = 0x44, | ||
| 36 | }; | ||
| 37 | } // Anonymous namespace | ||
| 38 | |||
| 39 | union VicConfig { | ||
| 40 | u64_le raw{}; | ||
| 41 | BitField<0, 7, VideoPixelFormat> pixel_format; | ||
| 42 | BitField<7, 2, u64_le> chroma_loc_horiz; | ||
| 43 | BitField<9, 2, u64_le> chroma_loc_vert; | ||
| 44 | BitField<11, 4, u64_le> block_linear_kind; | ||
| 45 | BitField<15, 4, u64_le> block_linear_height_log2; | ||
| 46 | BitField<32, 14, u64_le> surface_width_minus1; | ||
| 47 | BitField<46, 14, u64_le> surface_height_minus1; | ||
| 48 | }; | ||
| 29 | 49 | ||
| 30 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) | 50 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) |
| 31 | : gpu(gpu_), | 51 | : gpu(gpu_), |
| @@ -65,134 +85,155 @@ void Vic::Execute() { | |||
| 65 | if (!frame) { | 85 | if (!frame) { |
| 66 | return; | 86 | return; |
| 67 | } | 87 | } |
| 68 | const auto pixel_format = static_cast<VideoPixelFormat>(config.pixel_format.Value()); | 88 | const u64 surface_width = config.surface_width_minus1 + 1; |
| 69 | switch (pixel_format) { | 89 | const u64 surface_height = config.surface_height_minus1 + 1; |
| 90 | if (static_cast<u64>(frame->width) != surface_width || | ||
| 91 | static_cast<u64>(frame->height) != surface_height) { | ||
| 92 | // TODO: Properly support multiple video streams with differing frame dimensions | ||
| 93 | LOG_WARNING(Debug, "Frame dimensions {}x{} do not match expected surface dimensions {}x{}", | ||
| 94 | frame->width, frame->height, surface_width, surface_height); | ||
| 95 | return; | ||
| 96 | } | ||
| 97 | switch (config.pixel_format) { | ||
| 98 | case VideoPixelFormat::RGBA8: | ||
| 70 | case VideoPixelFormat::BGRA8: | 99 | case VideoPixelFormat::BGRA8: |
| 71 | case VideoPixelFormat::RGBA8: { | 100 | case VideoPixelFormat::RGBX8: |
| 72 | LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); | 101 | WriteRGBFrame(frame, config); |
| 102 | break; | ||
| 103 | case VideoPixelFormat::Yuv420: | ||
| 104 | WriteYUVFrame(frame, config); | ||
| 105 | break; | ||
| 106 | default: | ||
| 107 | UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value()); | ||
| 108 | break; | ||
| 109 | } | ||
| 110 | } | ||
| 73 | 111 | ||
| 74 | if (scaler_ctx == nullptr || frame->width != scaler_width || | 112 | void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { |
| 75 | frame->height != scaler_height) { | 113 | LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); |
| 76 | const AVPixelFormat target_format = | 114 | |
| 77 | (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA; | 115 | if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) { |
| 116 | const AVPixelFormat target_format = [pixel_format = config.pixel_format]() { | ||
| 117 | switch (pixel_format) { | ||
| 118 | case VideoPixelFormat::RGBA8: | ||
| 119 | return AV_PIX_FMT_RGBA; | ||
| 120 | case VideoPixelFormat::BGRA8: | ||
| 121 | return AV_PIX_FMT_BGRA; | ||
| 122 | case VideoPixelFormat::RGBX8: | ||
| 123 | return AV_PIX_FMT_RGB0; | ||
| 124 | default: | ||
| 125 | return AV_PIX_FMT_RGBA; | ||
| 126 | } | ||
| 127 | }(); | ||
| 128 | |||
| 129 | sws_freeContext(scaler_ctx); | ||
| 130 | // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format | ||
| 131 | scaler_ctx = sws_getContext(frame->width, frame->height, | ||
| 132 | static_cast<AVPixelFormat>(frame->format), frame->width, | ||
| 133 | frame->height, target_format, 0, nullptr, nullptr, nullptr); | ||
| 134 | scaler_width = frame->width; | ||
| 135 | scaler_height = frame->height; | ||
| 136 | converted_frame_buffer.reset(); | ||
| 137 | } | ||
| 138 | // Get Converted frame | ||
| 139 | const u32 width = static_cast<u32>(frame->width); | ||
| 140 | const u32 height = static_cast<u32>(frame->height); | ||
| 141 | const std::size_t linear_size = width * height * 4; | ||
| 142 | |||
| 143 | // Only allocate frame_buffer once per stream, as the size is not expected to change | ||
| 144 | if (!converted_frame_buffer) { | ||
| 145 | converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free}; | ||
| 146 | } | ||
| 147 | const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; | ||
| 148 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; | ||
| 149 | |||
| 150 | sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr, | ||
| 151 | converted_stride.data()); | ||
| 152 | |||
| 153 | const u32 blk_kind = static_cast<u32>(config.block_linear_kind); | ||
| 154 | if (blk_kind != 0) { | ||
| 155 | // swizzle pitch linear to block linear | ||
| 156 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | ||
| 157 | const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); | ||
| 158 | luma_buffer.resize(size); | ||
| 159 | Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), | ||
| 160 | converted_frame_buffer.get(), block_height, 0, 0); | ||
| 161 | |||
| 162 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); | ||
| 163 | } else { | ||
| 164 | // send pitch linear frame | ||
| 165 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | ||
| 166 | linear_size); | ||
| 167 | } | ||
| 168 | } | ||
| 78 | 169 | ||
| 79 | sws_freeContext(scaler_ctx); | 170 | void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { |
| 80 | scaler_ctx = nullptr; | 171 | LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); |
| 81 | 172 | ||
| 82 | // Frames are decoded into either YUV420 or NV12 formats. Convert to desired format | 173 | const std::size_t surface_width = config.surface_width_minus1 + 1; |
| 83 | scaler_ctx = sws_getContext(frame->width, frame->height, | 174 | const std::size_t surface_height = config.surface_height_minus1 + 1; |
| 84 | static_cast<AVPixelFormat>(frame->format), frame->width, | 175 | const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); |
| 85 | frame->height, target_format, 0, nullptr, nullptr, nullptr); | 176 | const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); |
| 177 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; | ||
| 86 | 178 | ||
| 87 | scaler_width = frame->width; | 179 | const auto stride = static_cast<size_t>(frame->linesize[0]); |
| 88 | scaler_height = frame->height; | 180 | |
| 89 | } | 181 | luma_buffer.resize(aligned_width * surface_height); |
| 90 | // Get Converted frame | 182 | chroma_buffer.resize(aligned_width * surface_height / 2); |
| 91 | const u32 width = static_cast<u32>(frame->width); | 183 | |
| 92 | const u32 height = static_cast<u32>(frame->height); | 184 | // Populate luma buffer |
| 93 | const std::size_t linear_size = width * height * 4; | 185 | const u8* luma_src = frame->data[0]; |
| 94 | 186 | for (std::size_t y = 0; y < frame_height; ++y) { | |
| 95 | // Only allocate frame_buffer once per stream, as the size is not expected to change | 187 | const std::size_t src = y * stride; |
| 96 | if (!converted_frame_buffer) { | 188 | const std::size_t dst = y * aligned_width; |
| 97 | converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free}; | 189 | for (std::size_t x = 0; x < frame_width; ++x) { |
| 190 | luma_buffer[dst + x] = luma_src[src + x]; | ||
| 98 | } | 191 | } |
| 99 | const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; | 192 | } |
| 100 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; | 193 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), |
| 101 | 194 | luma_buffer.size()); | |
| 102 | sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, | 195 | |
| 103 | &converted_frame_buf_addr, converted_stride.data()); | 196 | // Chroma |
| 104 | 197 | const std::size_t half_height = frame_height / 2; | |
| 105 | const u32 blk_kind = static_cast<u32>(config.block_linear_kind); | 198 | const auto half_stride = static_cast<size_t>(frame->linesize[1]); |
| 106 | if (blk_kind != 0) { | 199 | |
| 107 | // swizzle pitch linear to block linear | 200 | switch (frame->format) { |
| 108 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | 201 | case AV_PIX_FMT_YUV420P: { |
| 109 | const auto size = | 202 | // Frame from FFmpeg software |
| 110 | Tegra::Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); | 203 | // Populate chroma buffer from both channels with interleaving. |
| 111 | luma_buffer.resize(size); | 204 | const std::size_t half_width = frame_width / 2; |
| 112 | Tegra::Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), | 205 | const u8* chroma_b_src = frame->data[1]; |
| 113 | converted_frame_buffer.get(), block_height, 0, 0); | 206 | const u8* chroma_r_src = frame->data[2]; |
| 114 | 207 | for (std::size_t y = 0; y < half_height; ++y) { | |
| 115 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); | 208 | const std::size_t src = y * half_stride; |
| 116 | } else { | 209 | const std::size_t dst = y * aligned_width; |
| 117 | // send pitch linear frame | 210 | |
| 118 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | 211 | for (std::size_t x = 0; x < half_width; ++x) { |
| 119 | linear_size); | 212 | chroma_buffer[dst + x * 2] = chroma_b_src[src + x]; |
| 213 | chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x]; | ||
| 214 | } | ||
| 120 | } | 215 | } |
| 121 | break; | 216 | break; |
| 122 | } | 217 | } |
| 123 | case VideoPixelFormat::Yuv420: { | 218 | case AV_PIX_FMT_NV12: { |
| 124 | LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); | 219 | // Frame from VA-API hardware |
| 125 | 220 | // This is already interleaved so just copy | |
| 126 | const std::size_t surface_width = config.surface_width_minus1 + 1; | 221 | const u8* chroma_src = frame->data[1]; |
| 127 | const std::size_t surface_height = config.surface_height_minus1 + 1; | 222 | for (std::size_t y = 0; y < half_height; ++y) { |
| 128 | const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); | ||
| 129 | const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); | ||
| 130 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; | ||
| 131 | |||
| 132 | const auto stride = static_cast<size_t>(frame->linesize[0]); | ||
| 133 | |||
| 134 | luma_buffer.resize(aligned_width * surface_height); | ||
| 135 | chroma_buffer.resize(aligned_width * surface_height / 2); | ||
| 136 | |||
| 137 | // Populate luma buffer | ||
| 138 | const u8* luma_src = frame->data[0]; | ||
| 139 | for (std::size_t y = 0; y < frame_height; ++y) { | ||
| 140 | const std::size_t src = y * stride; | 223 | const std::size_t src = y * stride; |
| 141 | const std::size_t dst = y * aligned_width; | 224 | const std::size_t dst = y * aligned_width; |
| 142 | for (std::size_t x = 0; x < frame_width; ++x) { | 225 | for (std::size_t x = 0; x < frame_width; ++x) { |
| 143 | luma_buffer[dst + x] = luma_src[src + x]; | 226 | chroma_buffer[dst + x] = chroma_src[src + x]; |
| 144 | } | ||
| 145 | } | ||
| 146 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), | ||
| 147 | luma_buffer.size()); | ||
| 148 | |||
| 149 | // Chroma | ||
| 150 | const std::size_t half_height = frame_height / 2; | ||
| 151 | const auto half_stride = static_cast<size_t>(frame->linesize[1]); | ||
| 152 | |||
| 153 | switch (frame->format) { | ||
| 154 | case AV_PIX_FMT_YUV420P: { | ||
| 155 | // Frame from FFmpeg software | ||
| 156 | // Populate chroma buffer from both channels with interleaving. | ||
| 157 | const std::size_t half_width = frame_width / 2; | ||
| 158 | const u8* chroma_b_src = frame->data[1]; | ||
| 159 | const u8* chroma_r_src = frame->data[2]; | ||
| 160 | for (std::size_t y = 0; y < half_height; ++y) { | ||
| 161 | const std::size_t src = y * half_stride; | ||
| 162 | const std::size_t dst = y * aligned_width; | ||
| 163 | |||
| 164 | for (std::size_t x = 0; x < half_width; ++x) { | ||
| 165 | chroma_buffer[dst + x * 2] = chroma_b_src[src + x]; | ||
| 166 | chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x]; | ||
| 167 | } | ||
| 168 | } | 227 | } |
| 169 | break; | ||
| 170 | } | ||
| 171 | case AV_PIX_FMT_NV12: { | ||
| 172 | // Frame from VA-API hardware | ||
| 173 | // This is already interleaved so just copy | ||
| 174 | const u8* chroma_src = frame->data[1]; | ||
| 175 | for (std::size_t y = 0; y < half_height; ++y) { | ||
| 176 | const std::size_t src = y * stride; | ||
| 177 | const std::size_t dst = y * aligned_width; | ||
| 178 | for (std::size_t x = 0; x < frame_width; ++x) { | ||
| 179 | chroma_buffer[dst + x] = chroma_src[src + x]; | ||
| 180 | } | ||
| 181 | } | ||
| 182 | break; | ||
| 183 | } | ||
| 184 | default: | ||
| 185 | UNREACHABLE(); | ||
| 186 | break; | ||
| 187 | } | 228 | } |
| 188 | gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), | ||
| 189 | chroma_buffer.size()); | ||
| 190 | break; | 229 | break; |
| 191 | } | 230 | } |
| 192 | default: | 231 | default: |
| 193 | UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value()); | 232 | UNREACHABLE(); |
| 194 | break; | 233 | break; |
| 195 | } | 234 | } |
| 235 | gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), | ||
| 236 | chroma_buffer.size()); | ||
| 196 | } | 237 | } |
| 197 | 238 | ||
| 198 | } // namespace Tegra | 239 | } // namespace Tegra |
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h index 74246e08c..6d4cdfd57 100644 --- a/src/video_core/command_classes/vic.h +++ b/src/video_core/command_classes/vic.h | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 11 | 10 | ||
| 12 | struct SwsContext; | 11 | struct SwsContext; |
| @@ -14,6 +13,7 @@ struct SwsContext; | |||
| 14 | namespace Tegra { | 13 | namespace Tegra { |
| 15 | class GPU; | 14 | class GPU; |
| 16 | class Nvdec; | 15 | class Nvdec; |
| 16 | union VicConfig; | ||
| 17 | 17 | ||
| 18 | class Vic { | 18 | class Vic { |
| 19 | public: | 19 | public: |
| @@ -27,6 +27,7 @@ public: | |||
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); | 29 | explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); |
| 30 | |||
| 30 | ~Vic(); | 31 | ~Vic(); |
| 31 | 32 | ||
| 32 | /// Write to the device state. | 33 | /// Write to the device state. |
| @@ -35,22 +36,9 @@ public: | |||
| 35 | private: | 36 | private: |
| 36 | void Execute(); | 37 | void Execute(); |
| 37 | 38 | ||
| 38 | enum class VideoPixelFormat : u64_le { | 39 | void WriteRGBFrame(const AVFrame* frame, const VicConfig& config); |
| 39 | RGBA8 = 0x1f, | ||
| 40 | BGRA8 = 0x20, | ||
| 41 | Yuv420 = 0x44, | ||
| 42 | }; | ||
| 43 | 40 | ||
| 44 | union VicConfig { | 41 | void WriteYUVFrame(const AVFrame* frame, const VicConfig& config); |
| 45 | u64_le raw{}; | ||
| 46 | BitField<0, 7, u64_le> pixel_format; | ||
| 47 | BitField<7, 2, u64_le> chroma_loc_horiz; | ||
| 48 | BitField<9, 2, u64_le> chroma_loc_vert; | ||
| 49 | BitField<11, 4, u64_le> block_linear_kind; | ||
| 50 | BitField<15, 4, u64_le> block_linear_height_log2; | ||
| 51 | BitField<32, 14, u64_le> surface_width_minus1; | ||
| 52 | BitField<46, 14, u64_le> surface_height_minus1; | ||
| 53 | }; | ||
| 54 | 42 | ||
| 55 | GPU& gpu; | 43 | GPU& gpu; |
| 56 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; | 44 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; |