diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/command_classes/vic.cpp | 263 | ||||
| -rw-r--r-- | src/video_core/command_classes/vic.h | 21 |
2 files changed, 146 insertions, 138 deletions
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index d77eb0c85..3f2712a8d 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp | |||
| @@ -16,6 +16,7 @@ extern "C" { | |||
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 19 | #include "common/bit_field.h" | ||
| 19 | #include "common/logging/log.h" | 20 | #include "common/logging/log.h" |
| 20 | 21 | ||
| 21 | #include "video_core/command_classes/nvdec.h" | 22 | #include "video_core/command_classes/nvdec.h" |
| @@ -26,6 +27,25 @@ extern "C" { | |||
| 26 | #include "video_core/textures/decoders.h" | 27 | #include "video_core/textures/decoders.h" |
| 27 | 28 | ||
| 28 | namespace Tegra { | 29 | namespace Tegra { |
| 30 | namespace { | ||
| 31 | enum class VideoPixelFormat : u64_le { | ||
| 32 | RGBA8 = 0x1f, | ||
| 33 | BGRA8 = 0x20, | ||
| 34 | RGBX8 = 0x23, | ||
| 35 | Yuv420 = 0x44, | ||
| 36 | }; | ||
| 37 | } // Anonymous namespace | ||
| 38 | |||
| 39 | union VicConfig { | ||
| 40 | u64_le raw{}; | ||
| 41 | BitField<0, 7, VideoPixelFormat> pixel_format; | ||
| 42 | BitField<7, 2, u64_le> chroma_loc_horiz; | ||
| 43 | BitField<9, 2, u64_le> chroma_loc_vert; | ||
| 44 | BitField<11, 4, u64_le> block_linear_kind; | ||
| 45 | BitField<15, 4, u64_le> block_linear_height_log2; | ||
| 46 | BitField<32, 14, u64_le> surface_width_minus1; | ||
| 47 | BitField<46, 14, u64_le> surface_height_minus1; | ||
| 48 | }; | ||
| 29 | 49 | ||
| 30 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) | 50 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) |
| 31 | : gpu(gpu_), | 51 | : gpu(gpu_), |
| @@ -65,145 +85,146 @@ void Vic::Execute() { | |||
| 65 | if (!frame) { | 85 | if (!frame) { |
| 66 | return; | 86 | return; |
| 67 | } | 87 | } |
| 68 | const auto pixel_format = static_cast<VideoPixelFormat>(config.pixel_format.Value()); | 88 | switch (config.pixel_format) { |
| 69 | switch (pixel_format) { | 89 | case VideoPixelFormat::RGBA8: |
| 70 | case VideoPixelFormat::BGRA8: | 90 | case VideoPixelFormat::BGRA8: |
| 71 | case VideoPixelFormat::RGBX8: | 91 | case VideoPixelFormat::RGBX8: |
| 72 | case VideoPixelFormat::RGBA8: { | 92 | WriteRGBFrame(frame, config); |
| 73 | LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); | 93 | break; |
| 74 | 94 | case VideoPixelFormat::Yuv420: | |
| 75 | if (scaler_ctx == nullptr || frame->width != scaler_width || | 95 | WriteYUVFrame(frame, config); |
| 76 | frame->height != scaler_height) { | 96 | break; |
| 77 | const AVPixelFormat target_format = [pixel_format]() { | 97 | default: |
| 78 | switch (pixel_format) { | 98 | UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value()); |
| 79 | case VideoPixelFormat::BGRA8: | ||
| 80 | return AV_PIX_FMT_BGRA; | ||
| 81 | case VideoPixelFormat::RGBX8: | ||
| 82 | return AV_PIX_FMT_RGB0; | ||
| 83 | case VideoPixelFormat::RGBA8: | ||
| 84 | return AV_PIX_FMT_RGBA; | ||
| 85 | default: | ||
| 86 | return AV_PIX_FMT_RGBA; | ||
| 87 | } | ||
| 88 | }(); | ||
| 89 | |||
| 90 | sws_freeContext(scaler_ctx); | ||
| 91 | scaler_ctx = nullptr; | ||
| 92 | |||
| 93 | // Frames are decoded into either YUV420 or NV12 formats. Convert to desired format | ||
| 94 | scaler_ctx = sws_getContext(frame->width, frame->height, | ||
| 95 | static_cast<AVPixelFormat>(frame->format), frame->width, | ||
| 96 | frame->height, target_format, 0, nullptr, nullptr, nullptr); | ||
| 97 | |||
| 98 | scaler_width = frame->width; | ||
| 99 | scaler_height = frame->height; | ||
| 100 | } | ||
| 101 | // Get Converted frame | ||
| 102 | const u32 width = static_cast<u32>(frame->width); | ||
| 103 | const u32 height = static_cast<u32>(frame->height); | ||
| 104 | const std::size_t linear_size = width * height * 4; | ||
| 105 | |||
| 106 | // Only allocate frame_buffer once per stream, as the size is not expected to change | ||
| 107 | if (!converted_frame_buffer) { | ||
| 108 | converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free}; | ||
| 109 | } | ||
| 110 | const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; | ||
| 111 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; | ||
| 112 | |||
| 113 | sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, | ||
| 114 | &converted_frame_buf_addr, converted_stride.data()); | ||
| 115 | |||
| 116 | const u32 blk_kind = static_cast<u32>(config.block_linear_kind); | ||
| 117 | if (blk_kind != 0) { | ||
| 118 | // swizzle pitch linear to block linear | ||
| 119 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | ||
| 120 | const auto size = | ||
| 121 | Tegra::Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); | ||
| 122 | luma_buffer.resize(size); | ||
| 123 | Tegra::Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), | ||
| 124 | converted_frame_buffer.get(), block_height, 0, 0); | ||
| 125 | |||
| 126 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); | ||
| 127 | } else { | ||
| 128 | // send pitch linear frame | ||
| 129 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | ||
| 130 | linear_size); | ||
| 131 | } | ||
| 132 | break; | 99 | break; |
| 133 | } | 100 | } |
| 134 | case VideoPixelFormat::Yuv420: { | 101 | } |
| 135 | LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); | ||
| 136 | 102 | ||
| 137 | const std::size_t surface_width = config.surface_width_minus1 + 1; | 103 | void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { |
| 138 | const std::size_t surface_height = config.surface_height_minus1 + 1; | 104 | LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); |
| 139 | const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); | 105 | |
| 140 | const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); | 106 | if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) { |
| 141 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; | 107 | const AVPixelFormat target_format = [pixel_format = config.pixel_format]() { |
| 108 | switch (pixel_format) { | ||
| 109 | case VideoPixelFormat::RGBA8: | ||
| 110 | return AV_PIX_FMT_RGBA; | ||
| 111 | case VideoPixelFormat::BGRA8: | ||
| 112 | return AV_PIX_FMT_BGRA; | ||
| 113 | case VideoPixelFormat::RGBX8: | ||
| 114 | return AV_PIX_FMT_RGB0; | ||
| 115 | default: | ||
| 116 | return AV_PIX_FMT_RGBA; | ||
| 117 | } | ||
| 118 | }(); | ||
| 119 | |||
| 120 | sws_freeContext(scaler_ctx); | ||
| 121 | // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format | ||
| 122 | scaler_ctx = sws_getContext(frame->width, frame->height, | ||
| 123 | static_cast<AVPixelFormat>(frame->format), frame->width, | ||
| 124 | frame->height, target_format, 0, nullptr, nullptr, nullptr); | ||
| 125 | scaler_width = frame->width; | ||
| 126 | scaler_height = frame->height; | ||
| 127 | converted_frame_buffer.reset(); | ||
| 128 | } | ||
| 129 | // Get Converted frame | ||
| 130 | const u32 width = static_cast<u32>(frame->width); | ||
| 131 | const u32 height = static_cast<u32>(frame->height); | ||
| 132 | const std::size_t linear_size = width * height * 4; | ||
| 133 | |||
| 134 | // Only allocate frame_buffer once per stream, as the size is not expected to change | ||
| 135 | if (!converted_frame_buffer) { | ||
| 136 | converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free}; | ||
| 137 | } | ||
| 138 | const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; | ||
| 139 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; | ||
| 140 | |||
| 141 | sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr, | ||
| 142 | converted_stride.data()); | ||
| 143 | |||
| 144 | const u32 blk_kind = static_cast<u32>(config.block_linear_kind); | ||
| 145 | if (blk_kind != 0) { | ||
| 146 | // swizzle pitch linear to block linear | ||
| 147 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | ||
| 148 | const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); | ||
| 149 | luma_buffer.resize(size); | ||
| 150 | Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), | ||
| 151 | converted_frame_buffer.get(), block_height, 0, 0); | ||
| 152 | |||
| 153 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); | ||
| 154 | } else { | ||
| 155 | // send pitch linear frame | ||
| 156 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | ||
| 157 | linear_size); | ||
| 158 | } | ||
| 159 | } | ||
| 142 | 160 | ||
| 143 | const auto stride = static_cast<size_t>(frame->linesize[0]); | 161 | void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { |
| 162 | LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); | ||
| 144 | 163 | ||
| 145 | luma_buffer.resize(aligned_width * surface_height); | 164 | const std::size_t surface_width = config.surface_width_minus1 + 1; |
| 146 | chroma_buffer.resize(aligned_width * surface_height / 2); | 165 | const std::size_t surface_height = config.surface_height_minus1 + 1; |
| 166 | const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); | ||
| 167 | const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); | ||
| 168 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; | ||
| 147 | 169 | ||
| 148 | // Populate luma buffer | 170 | const auto stride = static_cast<size_t>(frame->linesize[0]); |
| 149 | const u8* luma_src = frame->data[0]; | 171 | |
| 150 | for (std::size_t y = 0; y < frame_height; ++y) { | 172 | luma_buffer.resize(aligned_width * surface_height); |
| 151 | const std::size_t src = y * stride; | 173 | chroma_buffer.resize(aligned_width * surface_height / 2); |
| 152 | const std::size_t dst = y * aligned_width; | 174 | |
| 153 | for (std::size_t x = 0; x < frame_width; ++x) { | 175 | // Populate luma buffer |
| 154 | luma_buffer[dst + x] = luma_src[src + x]; | 176 | const u8* luma_src = frame->data[0]; |
| 155 | } | 177 | for (std::size_t y = 0; y < frame_height; ++y) { |
| 178 | const std::size_t src = y * stride; | ||
| 179 | const std::size_t dst = y * aligned_width; | ||
| 180 | for (std::size_t x = 0; x < frame_width; ++x) { | ||
| 181 | luma_buffer[dst + x] = luma_src[src + x]; | ||
| 156 | } | 182 | } |
| 157 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), | 183 | } |
| 158 | luma_buffer.size()); | 184 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), |
| 159 | 185 | luma_buffer.size()); | |
| 160 | // Chroma | 186 | |
| 161 | const std::size_t half_height = frame_height / 2; | 187 | // Chroma |
| 162 | const auto half_stride = static_cast<size_t>(frame->linesize[1]); | 188 | const std::size_t half_height = frame_height / 2; |
| 163 | 189 | const auto half_stride = static_cast<size_t>(frame->linesize[1]); | |
| 164 | switch (frame->format) { | 190 | |
| 165 | case AV_PIX_FMT_YUV420P: { | 191 | switch (frame->format) { |
| 166 | // Frame from FFmpeg software | 192 | case AV_PIX_FMT_YUV420P: { |
| 167 | // Populate chroma buffer from both channels with interleaving. | 193 | // Frame from FFmpeg software |
| 168 | const std::size_t half_width = frame_width / 2; | 194 | // Populate chroma buffer from both channels with interleaving. |
| 169 | const u8* chroma_b_src = frame->data[1]; | 195 | const std::size_t half_width = frame_width / 2; |
| 170 | const u8* chroma_r_src = frame->data[2]; | 196 | const u8* chroma_b_src = frame->data[1]; |
| 171 | for (std::size_t y = 0; y < half_height; ++y) { | 197 | const u8* chroma_r_src = frame->data[2]; |
| 172 | const std::size_t src = y * half_stride; | 198 | for (std::size_t y = 0; y < half_height; ++y) { |
| 173 | const std::size_t dst = y * aligned_width; | 199 | const std::size_t src = y * half_stride; |
| 174 | 200 | const std::size_t dst = y * aligned_width; | |
| 175 | for (std::size_t x = 0; x < half_width; ++x) { | 201 | |
| 176 | chroma_buffer[dst + x * 2] = chroma_b_src[src + x]; | 202 | for (std::size_t x = 0; x < half_width; ++x) { |
| 177 | chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x]; | 203 | chroma_buffer[dst + x * 2] = chroma_b_src[src + x]; |
| 178 | } | 204 | chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x]; |
| 179 | } | 205 | } |
| 180 | break; | ||
| 181 | } | 206 | } |
| 182 | case AV_PIX_FMT_NV12: { | 207 | break; |
| 183 | // Frame from VA-API hardware | 208 | } |
| 184 | // This is already interleaved so just copy | 209 | case AV_PIX_FMT_NV12: { |
| 185 | const u8* chroma_src = frame->data[1]; | 210 | // Frame from VA-API hardware |
| 186 | for (std::size_t y = 0; y < half_height; ++y) { | 211 | // This is already interleaved so just copy |
| 187 | const std::size_t src = y * stride; | 212 | const u8* chroma_src = frame->data[1]; |
| 188 | const std::size_t dst = y * aligned_width; | 213 | for (std::size_t y = 0; y < half_height; ++y) { |
| 189 | for (std::size_t x = 0; x < frame_width; ++x) { | 214 | const std::size_t src = y * stride; |
| 190 | chroma_buffer[dst + x] = chroma_src[src + x]; | 215 | const std::size_t dst = y * aligned_width; |
| 191 | } | 216 | for (std::size_t x = 0; x < frame_width; ++x) { |
| 217 | chroma_buffer[dst + x] = chroma_src[src + x]; | ||
| 192 | } | 218 | } |
| 193 | break; | ||
| 194 | } | ||
| 195 | default: | ||
| 196 | UNREACHABLE(); | ||
| 197 | break; | ||
| 198 | } | 219 | } |
| 199 | gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), | ||
| 200 | chroma_buffer.size()); | ||
| 201 | break; | 220 | break; |
| 202 | } | 221 | } |
| 203 | default: | 222 | default: |
| 204 | UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value()); | 223 | UNREACHABLE(); |
| 205 | break; | 224 | break; |
| 206 | } | 225 | } |
| 226 | gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), | ||
| 227 | chroma_buffer.size()); | ||
| 207 | } | 228 | } |
| 208 | 229 | ||
| 209 | } // namespace Tegra | 230 | } // namespace Tegra |
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h index ea10c2f0f..6d4cdfd57 100644 --- a/src/video_core/command_classes/vic.h +++ b/src/video_core/command_classes/vic.h | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 11 | 10 | ||
| 12 | struct SwsContext; | 11 | struct SwsContext; |
| @@ -14,6 +13,7 @@ struct SwsContext; | |||
| 14 | namespace Tegra { | 13 | namespace Tegra { |
| 15 | class GPU; | 14 | class GPU; |
| 16 | class Nvdec; | 15 | class Nvdec; |
| 16 | union VicConfig; | ||
| 17 | 17 | ||
| 18 | class Vic { | 18 | class Vic { |
| 19 | public: | 19 | public: |
| @@ -27,6 +27,7 @@ public: | |||
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); | 29 | explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); |
| 30 | |||
| 30 | ~Vic(); | 31 | ~Vic(); |
| 31 | 32 | ||
| 32 | /// Write to the device state. | 33 | /// Write to the device state. |
| @@ -35,23 +36,9 @@ public: | |||
| 35 | private: | 36 | private: |
| 36 | void Execute(); | 37 | void Execute(); |
| 37 | 38 | ||
| 38 | enum class VideoPixelFormat : u64_le { | 39 | void WriteRGBFrame(const AVFrame* frame, const VicConfig& config); |
| 39 | RGBA8 = 0x1f, | ||
| 40 | BGRA8 = 0x20, | ||
| 41 | RGBX8 = 0x23, | ||
| 42 | Yuv420 = 0x44, | ||
| 43 | }; | ||
| 44 | 40 | ||
| 45 | union VicConfig { | 41 | void WriteYUVFrame(const AVFrame* frame, const VicConfig& config); |
| 46 | u64_le raw{}; | ||
| 47 | BitField<0, 7, u64_le> pixel_format; | ||
| 48 | BitField<7, 2, u64_le> chroma_loc_horiz; | ||
| 49 | BitField<9, 2, u64_le> chroma_loc_vert; | ||
| 50 | BitField<11, 4, u64_le> block_linear_kind; | ||
| 51 | BitField<15, 4, u64_le> block_linear_height_log2; | ||
| 52 | BitField<32, 14, u64_le> surface_width_minus1; | ||
| 53 | BitField<46, 14, u64_le> surface_height_minus1; | ||
| 54 | }; | ||
| 55 | 42 | ||
| 56 | GPU& gpu; | 43 | GPU& gpu; |
| 57 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; | 44 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; |