summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Mai M2021-10-08 19:19:20 -0400
committerGravatar GitHub2021-10-08 19:19:20 -0400
commit39cd6306e63848cdb9fafa6d30311b372223d3d4 (patch)
tree53014e4f34ee6fceabbc41c924899b42f7afb5ba /src
parentMerge pull request #7139 from Morph1984/service-headers (diff)
parentvic: Avoid memory corruption when multiple streams with different dimensions ... (diff)
downloadyuzu-39cd6306e63848cdb9fafa6d30311b372223d3d4.tar.gz
yuzu-39cd6306e63848cdb9fafa6d30311b372223d3d4.tar.xz
yuzu-39cd6306e63848cdb9fafa6d30311b372223d3d4.zip
Merge pull request #7138 from ameerj/vic-fmt
vic: Implement RGBX8 video frame format
Diffstat (limited to 'src')
-rw-r--r--src/video_core/command_classes/vic.cpp259
-rw-r--r--src/video_core/command_classes/vic.h20
2 files changed, 154 insertions, 125 deletions
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 0ee07f398..51f739801 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -16,6 +16,7 @@ extern "C" {
16} 16}
17 17
18#include "common/assert.h" 18#include "common/assert.h"
19#include "common/bit_field.h"
19#include "common/logging/log.h" 20#include "common/logging/log.h"
20 21
21#include "video_core/command_classes/nvdec.h" 22#include "video_core/command_classes/nvdec.h"
@@ -26,6 +27,25 @@ extern "C" {
26#include "video_core/textures/decoders.h" 27#include "video_core/textures/decoders.h"
27 28
28namespace Tegra { 29namespace Tegra {
30namespace {
31enum class VideoPixelFormat : u64_le {
32 RGBA8 = 0x1f,
33 BGRA8 = 0x20,
34 RGBX8 = 0x23,
35 Yuv420 = 0x44,
36};
37} // Anonymous namespace
38
39union VicConfig {
40 u64_le raw{};
41 BitField<0, 7, VideoPixelFormat> pixel_format;
42 BitField<7, 2, u64_le> chroma_loc_horiz;
43 BitField<9, 2, u64_le> chroma_loc_vert;
44 BitField<11, 4, u64_le> block_linear_kind;
45 BitField<15, 4, u64_le> block_linear_height_log2;
46 BitField<32, 14, u64_le> surface_width_minus1;
47 BitField<46, 14, u64_le> surface_height_minus1;
48};
29 49
30Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) 50Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
31 : gpu(gpu_), 51 : gpu(gpu_),
@@ -65,134 +85,155 @@ void Vic::Execute() {
65 if (!frame) { 85 if (!frame) {
66 return; 86 return;
67 } 87 }
68 const auto pixel_format = static_cast<VideoPixelFormat>(config.pixel_format.Value()); 88 const u64 surface_width = config.surface_width_minus1 + 1;
69 switch (pixel_format) { 89 const u64 surface_height = config.surface_height_minus1 + 1;
90 if (static_cast<u64>(frame->width) != surface_width ||
91 static_cast<u64>(frame->height) != surface_height) {
92 // TODO: Properly support multiple video streams with differing frame dimensions
93 LOG_WARNING(Debug, "Frame dimensions {}x{} do not match expected surface dimensions {}x{}",
94 frame->width, frame->height, surface_width, surface_height);
95 return;
96 }
97 switch (config.pixel_format) {
98 case VideoPixelFormat::RGBA8:
70 case VideoPixelFormat::BGRA8: 99 case VideoPixelFormat::BGRA8:
71 case VideoPixelFormat::RGBA8: { 100 case VideoPixelFormat::RGBX8:
72 LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); 101 WriteRGBFrame(frame, config);
102 break;
103 case VideoPixelFormat::Yuv420:
104 WriteYUVFrame(frame, config);
105 break;
106 default:
107 UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value());
108 break;
109 }
110}
73 111
74 if (scaler_ctx == nullptr || frame->width != scaler_width || 112void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
75 frame->height != scaler_height) { 113 LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
76 const AVPixelFormat target_format = 114
77 (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA; 115 if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) {
116 const AVPixelFormat target_format = [pixel_format = config.pixel_format]() {
117 switch (pixel_format) {
118 case VideoPixelFormat::RGBA8:
119 return AV_PIX_FMT_RGBA;
120 case VideoPixelFormat::BGRA8:
121 return AV_PIX_FMT_BGRA;
122 case VideoPixelFormat::RGBX8:
123 return AV_PIX_FMT_RGB0;
124 default:
125 return AV_PIX_FMT_RGBA;
126 }
127 }();
128
129 sws_freeContext(scaler_ctx);
130 // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format
131 scaler_ctx = sws_getContext(frame->width, frame->height,
132 static_cast<AVPixelFormat>(frame->format), frame->width,
133 frame->height, target_format, 0, nullptr, nullptr, nullptr);
134 scaler_width = frame->width;
135 scaler_height = frame->height;
136 converted_frame_buffer.reset();
137 }
138 // Get Converted frame
139 const u32 width = static_cast<u32>(frame->width);
140 const u32 height = static_cast<u32>(frame->height);
141 const std::size_t linear_size = width * height * 4;
142
143 // Only allocate frame_buffer once per stream, as the size is not expected to change
144 if (!converted_frame_buffer) {
145 converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free};
146 }
147 const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
148 u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
149
150 sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr,
151 converted_stride.data());
152
153 const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
154 if (blk_kind != 0) {
155 // swizzle pitch linear to block linear
156 const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
157 const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
158 luma_buffer.resize(size);
159 Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(),
160 converted_frame_buffer.get(), block_height, 0, 0);
161
162 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
163 } else {
164 // send pitch linear frame
165 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
166 linear_size);
167 }
168}
78 169
79 sws_freeContext(scaler_ctx); 170void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
80 scaler_ctx = nullptr; 171 LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
81 172
82 // Frames are decoded into either YUV420 or NV12 formats. Convert to desired format 173 const std::size_t surface_width = config.surface_width_minus1 + 1;
83 scaler_ctx = sws_getContext(frame->width, frame->height, 174 const std::size_t surface_height = config.surface_height_minus1 + 1;
84 static_cast<AVPixelFormat>(frame->format), frame->width, 175 const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
85 frame->height, target_format, 0, nullptr, nullptr, nullptr); 176 const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
177 const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
86 178
87 scaler_width = frame->width; 179 const auto stride = static_cast<size_t>(frame->linesize[0]);
88 scaler_height = frame->height; 180
89 } 181 luma_buffer.resize(aligned_width * surface_height);
90 // Get Converted frame 182 chroma_buffer.resize(aligned_width * surface_height / 2);
91 const u32 width = static_cast<u32>(frame->width); 183
92 const u32 height = static_cast<u32>(frame->height); 184 // Populate luma buffer
93 const std::size_t linear_size = width * height * 4; 185 const u8* luma_src = frame->data[0];
94 186 for (std::size_t y = 0; y < frame_height; ++y) {
95 // Only allocate frame_buffer once per stream, as the size is not expected to change 187 const std::size_t src = y * stride;
96 if (!converted_frame_buffer) { 188 const std::size_t dst = y * aligned_width;
97 converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free}; 189 for (std::size_t x = 0; x < frame_width; ++x) {
190 luma_buffer[dst + x] = luma_src[src + x];
98 } 191 }
99 const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; 192 }
100 u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; 193 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
101 194 luma_buffer.size());
102 sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, 195
103 &converted_frame_buf_addr, converted_stride.data()); 196 // Chroma
104 197 const std::size_t half_height = frame_height / 2;
105 const u32 blk_kind = static_cast<u32>(config.block_linear_kind); 198 const auto half_stride = static_cast<size_t>(frame->linesize[1]);
106 if (blk_kind != 0) { 199
107 // swizzle pitch linear to block linear 200 switch (frame->format) {
108 const u32 block_height = static_cast<u32>(config.block_linear_height_log2); 201 case AV_PIX_FMT_YUV420P: {
109 const auto size = 202 // Frame from FFmpeg software
110 Tegra::Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); 203 // Populate chroma buffer from both channels with interleaving.
111 luma_buffer.resize(size); 204 const std::size_t half_width = frame_width / 2;
112 Tegra::Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), 205 const u8* chroma_b_src = frame->data[1];
113 converted_frame_buffer.get(), block_height, 0, 0); 206 const u8* chroma_r_src = frame->data[2];
114 207 for (std::size_t y = 0; y < half_height; ++y) {
115 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); 208 const std::size_t src = y * half_stride;
116 } else { 209 const std::size_t dst = y * aligned_width;
117 // send pitch linear frame 210
118 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, 211 for (std::size_t x = 0; x < half_width; ++x) {
119 linear_size); 212 chroma_buffer[dst + x * 2] = chroma_b_src[src + x];
213 chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x];
214 }
120 } 215 }
121 break; 216 break;
122 } 217 }
123 case VideoPixelFormat::Yuv420: { 218 case AV_PIX_FMT_NV12: {
124 LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); 219 // Frame from VA-API hardware
125 220 // This is already interleaved so just copy
126 const std::size_t surface_width = config.surface_width_minus1 + 1; 221 const u8* chroma_src = frame->data[1];
127 const std::size_t surface_height = config.surface_height_minus1 + 1; 222 for (std::size_t y = 0; y < half_height; ++y) {
128 const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
129 const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
130 const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
131
132 const auto stride = static_cast<size_t>(frame->linesize[0]);
133
134 luma_buffer.resize(aligned_width * surface_height);
135 chroma_buffer.resize(aligned_width * surface_height / 2);
136
137 // Populate luma buffer
138 const u8* luma_src = frame->data[0];
139 for (std::size_t y = 0; y < frame_height; ++y) {
140 const std::size_t src = y * stride; 223 const std::size_t src = y * stride;
141 const std::size_t dst = y * aligned_width; 224 const std::size_t dst = y * aligned_width;
142 for (std::size_t x = 0; x < frame_width; ++x) { 225 for (std::size_t x = 0; x < frame_width; ++x) {
143 luma_buffer[dst + x] = luma_src[src + x]; 226 chroma_buffer[dst + x] = chroma_src[src + x];
144 }
145 }
146 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
147 luma_buffer.size());
148
149 // Chroma
150 const std::size_t half_height = frame_height / 2;
151 const auto half_stride = static_cast<size_t>(frame->linesize[1]);
152
153 switch (frame->format) {
154 case AV_PIX_FMT_YUV420P: {
155 // Frame from FFmpeg software
156 // Populate chroma buffer from both channels with interleaving.
157 const std::size_t half_width = frame_width / 2;
158 const u8* chroma_b_src = frame->data[1];
159 const u8* chroma_r_src = frame->data[2];
160 for (std::size_t y = 0; y < half_height; ++y) {
161 const std::size_t src = y * half_stride;
162 const std::size_t dst = y * aligned_width;
163
164 for (std::size_t x = 0; x < half_width; ++x) {
165 chroma_buffer[dst + x * 2] = chroma_b_src[src + x];
166 chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x];
167 }
168 } 227 }
169 break;
170 }
171 case AV_PIX_FMT_NV12: {
172 // Frame from VA-API hardware
173 // This is already interleaved so just copy
174 const u8* chroma_src = frame->data[1];
175 for (std::size_t y = 0; y < half_height; ++y) {
176 const std::size_t src = y * stride;
177 const std::size_t dst = y * aligned_width;
178 for (std::size_t x = 0; x < frame_width; ++x) {
179 chroma_buffer[dst + x] = chroma_src[src + x];
180 }
181 }
182 break;
183 }
184 default:
185 UNREACHABLE();
186 break;
187 } 228 }
188 gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
189 chroma_buffer.size());
190 break; 229 break;
191 } 230 }
192 default: 231 default:
193 UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value()); 232 UNREACHABLE();
194 break; 233 break;
195 } 234 }
235 gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
236 chroma_buffer.size());
196} 237}
197 238
198} // namespace Tegra 239} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
index 74246e08c..6d4cdfd57 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/command_classes/vic.h
@@ -6,7 +6,6 @@
6 6
7#include <memory> 7#include <memory>
8#include <vector> 8#include <vector>
9#include "common/bit_field.h"
10#include "common/common_types.h" 9#include "common/common_types.h"
11 10
12struct SwsContext; 11struct SwsContext;
@@ -14,6 +13,7 @@ struct SwsContext;
14namespace Tegra { 13namespace Tegra {
15class GPU; 14class GPU;
16class Nvdec; 15class Nvdec;
16union VicConfig;
17 17
18class Vic { 18class Vic {
19public: 19public:
@@ -27,6 +27,7 @@ public:
27 }; 27 };
28 28
29 explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); 29 explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
30
30 ~Vic(); 31 ~Vic();
31 32
32 /// Write to the device state. 33 /// Write to the device state.
@@ -35,22 +36,9 @@ public:
35private: 36private:
36 void Execute(); 37 void Execute();
37 38
38 enum class VideoPixelFormat : u64_le { 39 void WriteRGBFrame(const AVFrame* frame, const VicConfig& config);
39 RGBA8 = 0x1f,
40 BGRA8 = 0x20,
41 Yuv420 = 0x44,
42 };
43 40
44 union VicConfig { 41 void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
45 u64_le raw{};
46 BitField<0, 7, u64_le> pixel_format;
47 BitField<7, 2, u64_le> chroma_loc_horiz;
48 BitField<9, 2, u64_le> chroma_loc_vert;
49 BitField<11, 4, u64_le> block_linear_kind;
50 BitField<15, 4, u64_le> block_linear_height_log2;
51 BitField<32, 14, u64_le> surface_width_minus1;
52 BitField<46, 14, u64_le> surface_height_minus1;
53 };
54 42
55 GPU& gpu; 43 GPU& gpu;
56 std::shared_ptr<Tegra::Nvdec> nvdec_processor; 44 std::shared_ptr<Tegra::Nvdec> nvdec_processor;