summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/command_classes/vic.cpp263
-rw-r--r--src/video_core/command_classes/vic.h21
2 files changed, 146 insertions, 138 deletions
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index d77eb0c85..3f2712a8d 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -16,6 +16,7 @@ extern "C" {
16} 16}
17 17
18#include "common/assert.h" 18#include "common/assert.h"
19#include "common/bit_field.h"
19#include "common/logging/log.h" 20#include "common/logging/log.h"
20 21
21#include "video_core/command_classes/nvdec.h" 22#include "video_core/command_classes/nvdec.h"
@@ -26,6 +27,25 @@ extern "C" {
26#include "video_core/textures/decoders.h" 27#include "video_core/textures/decoders.h"
27 28
28namespace Tegra { 29namespace Tegra {
30namespace {
31enum class VideoPixelFormat : u64_le {
32 RGBA8 = 0x1f,
33 BGRA8 = 0x20,
34 RGBX8 = 0x23,
35 Yuv420 = 0x44,
36};
37} // Anonymous namespace
38
39union VicConfig {
40 u64_le raw{};
41 BitField<0, 7, VideoPixelFormat> pixel_format;
42 BitField<7, 2, u64_le> chroma_loc_horiz;
43 BitField<9, 2, u64_le> chroma_loc_vert;
44 BitField<11, 4, u64_le> block_linear_kind;
45 BitField<15, 4, u64_le> block_linear_height_log2;
46 BitField<32, 14, u64_le> surface_width_minus1;
47 BitField<46, 14, u64_le> surface_height_minus1;
48};
29 49
30Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) 50Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
31 : gpu(gpu_), 51 : gpu(gpu_),
@@ -65,145 +85,146 @@ void Vic::Execute() {
65 if (!frame) { 85 if (!frame) {
66 return; 86 return;
67 } 87 }
68 const auto pixel_format = static_cast<VideoPixelFormat>(config.pixel_format.Value()); 88 switch (config.pixel_format) {
69 switch (pixel_format) { 89 case VideoPixelFormat::RGBA8:
70 case VideoPixelFormat::BGRA8: 90 case VideoPixelFormat::BGRA8:
71 case VideoPixelFormat::RGBX8: 91 case VideoPixelFormat::RGBX8:
72 case VideoPixelFormat::RGBA8: { 92 WriteRGBFrame(frame, config);
73 LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); 93 break;
74 94 case VideoPixelFormat::Yuv420:
75 if (scaler_ctx == nullptr || frame->width != scaler_width || 95 WriteYUVFrame(frame, config);
76 frame->height != scaler_height) { 96 break;
77 const AVPixelFormat target_format = [pixel_format]() { 97 default:
78 switch (pixel_format) { 98 UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value());
79 case VideoPixelFormat::BGRA8:
80 return AV_PIX_FMT_BGRA;
81 case VideoPixelFormat::RGBX8:
82 return AV_PIX_FMT_RGB0;
83 case VideoPixelFormat::RGBA8:
84 return AV_PIX_FMT_RGBA;
85 default:
86 return AV_PIX_FMT_RGBA;
87 }
88 }();
89
90 sws_freeContext(scaler_ctx);
91 scaler_ctx = nullptr;
92
93 // Frames are decoded into either YUV420 or NV12 formats. Convert to desired format
94 scaler_ctx = sws_getContext(frame->width, frame->height,
95 static_cast<AVPixelFormat>(frame->format), frame->width,
96 frame->height, target_format, 0, nullptr, nullptr, nullptr);
97
98 scaler_width = frame->width;
99 scaler_height = frame->height;
100 }
101 // Get Converted frame
102 const u32 width = static_cast<u32>(frame->width);
103 const u32 height = static_cast<u32>(frame->height);
104 const std::size_t linear_size = width * height * 4;
105
106 // Only allocate frame_buffer once per stream, as the size is not expected to change
107 if (!converted_frame_buffer) {
108 converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free};
109 }
110 const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
111 u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
112
113 sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height,
114 &converted_frame_buf_addr, converted_stride.data());
115
116 const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
117 if (blk_kind != 0) {
118 // swizzle pitch linear to block linear
119 const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
120 const auto size =
121 Tegra::Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
122 luma_buffer.resize(size);
123 Tegra::Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(),
124 converted_frame_buffer.get(), block_height, 0, 0);
125
126 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
127 } else {
128 // send pitch linear frame
129 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
130 linear_size);
131 }
132 break; 99 break;
133 } 100 }
134 case VideoPixelFormat::Yuv420: { 101}
135 LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
136 102
137 const std::size_t surface_width = config.surface_width_minus1 + 1; 103void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
138 const std::size_t surface_height = config.surface_height_minus1 + 1; 104 LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
139 const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); 105
140 const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); 106 if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) {
141 const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; 107 const AVPixelFormat target_format = [pixel_format = config.pixel_format]() {
108 switch (pixel_format) {
109 case VideoPixelFormat::RGBA8:
110 return AV_PIX_FMT_RGBA;
111 case VideoPixelFormat::BGRA8:
112 return AV_PIX_FMT_BGRA;
113 case VideoPixelFormat::RGBX8:
114 return AV_PIX_FMT_RGB0;
115 default:
116 return AV_PIX_FMT_RGBA;
117 }
118 }();
119
120 sws_freeContext(scaler_ctx);
121 // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format
122 scaler_ctx = sws_getContext(frame->width, frame->height,
123 static_cast<AVPixelFormat>(frame->format), frame->width,
124 frame->height, target_format, 0, nullptr, nullptr, nullptr);
125 scaler_width = frame->width;
126 scaler_height = frame->height;
127 converted_frame_buffer.reset();
128 }
129 // Get Converted frame
130 const u32 width = static_cast<u32>(frame->width);
131 const u32 height = static_cast<u32>(frame->height);
132 const std::size_t linear_size = width * height * 4;
133
134 // Only allocate frame_buffer once per stream, as the size is not expected to change
135 if (!converted_frame_buffer) {
136 converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free};
137 }
138 const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
139 u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
140
141 sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr,
142 converted_stride.data());
143
144 const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
145 if (blk_kind != 0) {
146 // swizzle pitch linear to block linear
147 const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
148 const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
149 luma_buffer.resize(size);
150 Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(),
151 converted_frame_buffer.get(), block_height, 0, 0);
152
153 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
154 } else {
155 // send pitch linear frame
156 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
157 linear_size);
158 }
159}
142 160
143 const auto stride = static_cast<size_t>(frame->linesize[0]); 161void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
162 LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
144 163
145 luma_buffer.resize(aligned_width * surface_height); 164 const std::size_t surface_width = config.surface_width_minus1 + 1;
146 chroma_buffer.resize(aligned_width * surface_height / 2); 165 const std::size_t surface_height = config.surface_height_minus1 + 1;
166 const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
167 const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
168 const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
147 169
148 // Populate luma buffer 170 const auto stride = static_cast<size_t>(frame->linesize[0]);
149 const u8* luma_src = frame->data[0]; 171
150 for (std::size_t y = 0; y < frame_height; ++y) { 172 luma_buffer.resize(aligned_width * surface_height);
151 const std::size_t src = y * stride; 173 chroma_buffer.resize(aligned_width * surface_height / 2);
152 const std::size_t dst = y * aligned_width; 174
153 for (std::size_t x = 0; x < frame_width; ++x) { 175 // Populate luma buffer
154 luma_buffer[dst + x] = luma_src[src + x]; 176 const u8* luma_src = frame->data[0];
155 } 177 for (std::size_t y = 0; y < frame_height; ++y) {
178 const std::size_t src = y * stride;
179 const std::size_t dst = y * aligned_width;
180 for (std::size_t x = 0; x < frame_width; ++x) {
181 luma_buffer[dst + x] = luma_src[src + x];
156 } 182 }
157 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), 183 }
158 luma_buffer.size()); 184 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
159 185 luma_buffer.size());
160 // Chroma 186
161 const std::size_t half_height = frame_height / 2; 187 // Chroma
162 const auto half_stride = static_cast<size_t>(frame->linesize[1]); 188 const std::size_t half_height = frame_height / 2;
163 189 const auto half_stride = static_cast<size_t>(frame->linesize[1]);
164 switch (frame->format) { 190
165 case AV_PIX_FMT_YUV420P: { 191 switch (frame->format) {
166 // Frame from FFmpeg software 192 case AV_PIX_FMT_YUV420P: {
167 // Populate chroma buffer from both channels with interleaving. 193 // Frame from FFmpeg software
168 const std::size_t half_width = frame_width / 2; 194 // Populate chroma buffer from both channels with interleaving.
169 const u8* chroma_b_src = frame->data[1]; 195 const std::size_t half_width = frame_width / 2;
170 const u8* chroma_r_src = frame->data[2]; 196 const u8* chroma_b_src = frame->data[1];
171 for (std::size_t y = 0; y < half_height; ++y) { 197 const u8* chroma_r_src = frame->data[2];
172 const std::size_t src = y * half_stride; 198 for (std::size_t y = 0; y < half_height; ++y) {
173 const std::size_t dst = y * aligned_width; 199 const std::size_t src = y * half_stride;
174 200 const std::size_t dst = y * aligned_width;
175 for (std::size_t x = 0; x < half_width; ++x) { 201
176 chroma_buffer[dst + x * 2] = chroma_b_src[src + x]; 202 for (std::size_t x = 0; x < half_width; ++x) {
177 chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x]; 203 chroma_buffer[dst + x * 2] = chroma_b_src[src + x];
178 } 204 chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x];
179 } 205 }
180 break;
181 } 206 }
182 case AV_PIX_FMT_NV12: { 207 break;
183 // Frame from VA-API hardware 208 }
184 // This is already interleaved so just copy 209 case AV_PIX_FMT_NV12: {
185 const u8* chroma_src = frame->data[1]; 210 // Frame from VA-API hardware
186 for (std::size_t y = 0; y < half_height; ++y) { 211 // This is already interleaved so just copy
187 const std::size_t src = y * stride; 212 const u8* chroma_src = frame->data[1];
188 const std::size_t dst = y * aligned_width; 213 for (std::size_t y = 0; y < half_height; ++y) {
189 for (std::size_t x = 0; x < frame_width; ++x) { 214 const std::size_t src = y * stride;
190 chroma_buffer[dst + x] = chroma_src[src + x]; 215 const std::size_t dst = y * aligned_width;
191 } 216 for (std::size_t x = 0; x < frame_width; ++x) {
217 chroma_buffer[dst + x] = chroma_src[src + x];
192 } 218 }
193 break;
194 }
195 default:
196 UNREACHABLE();
197 break;
198 } 219 }
199 gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
200 chroma_buffer.size());
201 break; 220 break;
202 } 221 }
203 default: 222 default:
204 UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value()); 223 UNREACHABLE();
205 break; 224 break;
206 } 225 }
226 gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
227 chroma_buffer.size());
207} 228}
208 229
209} // namespace Tegra 230} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
index ea10c2f0f..6d4cdfd57 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/command_classes/vic.h
@@ -6,7 +6,6 @@
6 6
7#include <memory> 7#include <memory>
8#include <vector> 8#include <vector>
9#include "common/bit_field.h"
10#include "common/common_types.h" 9#include "common/common_types.h"
11 10
12struct SwsContext; 11struct SwsContext;
@@ -14,6 +13,7 @@ struct SwsContext;
14namespace Tegra { 13namespace Tegra {
15class GPU; 14class GPU;
16class Nvdec; 15class Nvdec;
16union VicConfig;
17 17
18class Vic { 18class Vic {
19public: 19public:
@@ -27,6 +27,7 @@ public:
27 }; 27 };
28 28
29 explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); 29 explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
30
30 ~Vic(); 31 ~Vic();
31 32
32 /// Write to the device state. 33 /// Write to the device state.
@@ -35,23 +36,9 @@ public:
35private: 36private:
36 void Execute(); 37 void Execute();
37 38
38 enum class VideoPixelFormat : u64_le { 39 void WriteRGBFrame(const AVFrame* frame, const VicConfig& config);
39 RGBA8 = 0x1f,
40 BGRA8 = 0x20,
41 RGBX8 = 0x23,
42 Yuv420 = 0x44,
43 };
44 40
45 union VicConfig { 41 void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
46 u64_le raw{};
47 BitField<0, 7, u64_le> pixel_format;
48 BitField<7, 2, u64_le> chroma_loc_horiz;
49 BitField<9, 2, u64_le> chroma_loc_vert;
50 BitField<11, 4, u64_le> block_linear_kind;
51 BitField<15, 4, u64_le> block_linear_height_log2;
52 BitField<32, 14, u64_le> surface_width_minus1;
53 BitField<46, 14, u64_le> surface_height_minus1;
54 };
55 42
56 GPU& gpu; 43 GPU& gpu;
57 std::shared_ptr<Tegra::Nvdec> nvdec_processor; 44 std::shared_ptr<Tegra::Nvdec> nvdec_processor;