summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/hle/service/y2r_u.cpp376
-rw-r--r--src/core/hle/service/y2r_u.h96
-rw-r--r--src/core/hw/y2r.cpp369
-rw-r--r--src/core/hw/y2r.h15
5 files changed, 695 insertions, 163 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index bbc285168..ea5533dcf 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -96,6 +96,7 @@ set(SRCS
96 hw/gpu.cpp 96 hw/gpu.cpp
97 hw/hw.cpp 97 hw/hw.cpp
98 hw/lcd.cpp 98 hw/lcd.cpp
99 hw/y2r.cpp
99 loader/3dsx.cpp 100 loader/3dsx.cpp
100 loader/elf.cpp 101 loader/elf.cpp
101 loader/loader.cpp 102 loader/loader.cpp
@@ -209,6 +210,7 @@ set(HEADERS
209 hw/gpu.h 210 hw/gpu.h
210 hw/hw.h 211 hw/hw.h
211 hw/lcd.h 212 hw/lcd.h
213 hw/y2r.h
212 loader/3dsx.h 214 loader/3dsx.h
213 loader/elf.h 215 loader/elf.h
214 loader/loader.h 216 loader/loader.h
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 73a0899dd..17cb4f0f0 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -9,8 +9,8 @@
9#include "core/hle/hle.h" 9#include "core/hle/hle.h"
10#include "core/hle/kernel/event.h" 10#include "core/hle/kernel/event.h"
11#include "core/hle/service/y2r_u.h" 11#include "core/hle/service/y2r_u.h"
12#include "core/hw/y2r.h"
12#include "core/mem_map.h" 13#include "core/mem_map.h"
13#include "core/memory.h"
14 14
15#include "video_core/utils.h" 15#include "video_core/utils.h"
16#include "video_core/video_core.h" 16#include "video_core/video_core.h"
@@ -20,47 +20,6 @@
20 20
21namespace Y2R_U { 21namespace Y2R_U {
22 22
23enum class InputFormat : u8 {
24 /// 8-bit input, with YUV components in separate planes and using 4:2:2 subsampling.
25 YUV422_Indiv8 = 0,
26 /// 8-bit input, with YUV components in separate planes and using 4:2:0 subsampling.
27 YUV420_Indiv8 = 1,
28
29 YUV422_INDIV_16 = 2,
30 YUV420_INDIV_16 = 3,
31 YUV422_BATCH = 4,
32};
33
34enum class OutputFormat : u8 {
35 Rgb32 = 0,
36 Rgb24 = 1,
37 Rgb16_555 = 2,
38 Rgb16_565 = 3,
39};
40
41enum class Rotation : u8 {
42 None = 0,
43 Clockwise_90 = 1,
44 Clockwise_180 = 2,
45 Clockwise_270 = 3,
46};
47
48enum class BlockAlignment : u8 {
49 /// Image is output in linear format suitable for use as a framebuffer.
50 Linear = 0,
51 /// Image is output in tiled PICA format, suitable for use as a texture.
52 Block8x8 = 1,
53};
54
55enum class StandardCoefficient : u8 {
56 ITU_Rec601 = 0,
57 ITU_Rec709 = 1,
58 ITU_Rec601_Scaling = 2,
59 ITU_Rec709_Scaling = 3,
60};
61
62static Kernel::SharedPtr<Kernel::Event> completion_event;
63
64struct ConversionParameters { 23struct ConversionParameters {
65 InputFormat input_format; 24 InputFormat input_format;
66 OutputFormat output_format; 25 OutputFormat output_format;
@@ -74,28 +33,60 @@ struct ConversionParameters {
74}; 33};
75static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size"); 34static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size");
76 35
77struct ConversionBuffer { 36static Kernel::SharedPtr<Kernel::Event> completion_event;
78 VAddr address; 37static ConversionConfiguration conversion;
79 u32 image_size;
80 u16 transfer_unit;
81 u16 stride;
82};
83 38
84struct ConversionData { 39static const CoefficientSet standard_coefficients[4] = {
85 ConversionParameters params; 40 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601
86 /// Input parameters for the Y (luma) plane 41 {{ 0x100, 0x193, 0x77, 0x2F, 0x1DB, -0x1933, 0xA7C, -0x1D51 }}, // ITU_Rec709
87 ConversionBuffer src_Y; 42 {{ 0x12A, 0x198, 0xD0, 0x64, 0x204, -0x1BDE, 0x10F2, -0x229B }}, // ITU_Rec601_Scaling
88 /// Output parameters for the conversion results 43 {{ 0x12A, 0x1CA, 0x88, 0x36, 0x21C, -0x1F04, 0x99C, -0x2421 }}, // ITU_Rec709_Scaling
89 ConversionBuffer dst;
90}; 44};
91 45
92static ConversionData conversion; 46ResultCode ConversionConfiguration::SetInputLineWidth(u16 width) {
47 if (width == 0 || width > 1024 || width % 8 != 0) {
48 return ResultCode(ErrorDescription::OutOfRange, ErrorModule::CAM,
49 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053FD
50 }
51
52 // Note: The hardware uses the register value 0 to represent a width of 1024, so for a width of
53 // 1024 the `camera` module would set the value 0 here, but we don't need to emulate this
54 // internal detail.
55 this->input_line_width = width;
56 return RESULT_SUCCESS;
57}
58
59ResultCode ConversionConfiguration::SetInputLines(u16 lines) {
60 if (lines == 0 || lines > 1024) {
61 return ResultCode(ErrorDescription::OutOfRange, ErrorModule::CAM,
62 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053FD
63 }
64
65 // Note: In what appears to be a bug, the `camera` module does not set the hardware register at
66 // all if `lines` is 1024, so the conversion uses the last value that was set. The intention
67 // was probably to set it to 0 like in SetInputLineWidth.
68 if (lines != 1024) {
69 this->input_lines = lines;
70 }
71 return RESULT_SUCCESS;
72}
73
74ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) {
75 size_t index = static_cast<size_t>(standard_coefficient);
76 if (index >= 4) {
77 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM,
78 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED
79 }
80
81 std::memcpy(coefficients.data(), standard_coefficients[index].data(), sizeof(coefficients));
82 return RESULT_SUCCESS;
83}
93 84
94static void SetInputFormat(Service::Interface* self) { 85static void SetInputFormat(Service::Interface* self) {
95 u32* cmd_buff = Kernel::GetCommandBuffer(); 86 u32* cmd_buff = Kernel::GetCommandBuffer();
96 87
97 conversion.params.input_format = static_cast<InputFormat>(cmd_buff[1]); 88 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]);
98 LOG_DEBUG(Service_Y2R, "called input_format=%u", conversion.params.input_format); 89 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
99 90
100 cmd_buff[1] = RESULT_SUCCESS.raw; 91 cmd_buff[1] = RESULT_SUCCESS.raw;
101} 92}
@@ -103,8 +94,8 @@ static void SetInputFormat(Service::Interface* self) {
103static void SetOutputFormat(Service::Interface* self) { 94static void SetOutputFormat(Service::Interface* self) {
104 u32* cmd_buff = Kernel::GetCommandBuffer(); 95 u32* cmd_buff = Kernel::GetCommandBuffer();
105 96
106 conversion.params.output_format = static_cast<OutputFormat>(cmd_buff[1]); 97 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]);
107 LOG_DEBUG(Service_Y2R, "called output_format=%u", conversion.params.output_format); 98 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
108 99
109 cmd_buff[1] = RESULT_SUCCESS.raw; 100 cmd_buff[1] = RESULT_SUCCESS.raw;
110} 101}
@@ -112,8 +103,8 @@ static void SetOutputFormat(Service::Interface* self) {
112static void SetRotation(Service::Interface* self) { 103static void SetRotation(Service::Interface* self) {
113 u32* cmd_buff = Kernel::GetCommandBuffer(); 104 u32* cmd_buff = Kernel::GetCommandBuffer();
114 105
115 conversion.params.rotation = static_cast<Rotation>(cmd_buff[1]); 106 conversion.rotation = static_cast<Rotation>(cmd_buff[1]);
116 LOG_DEBUG(Service_Y2R, "called rotation=%u", conversion.params.rotation); 107 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
117 108
118 cmd_buff[1] = RESULT_SUCCESS.raw; 109 cmd_buff[1] = RESULT_SUCCESS.raw;
119} 110}
@@ -121,10 +112,18 @@ static void SetRotation(Service::Interface* self) {
121static void SetBlockAlignment(Service::Interface* self) { 112static void SetBlockAlignment(Service::Interface* self) {
122 u32* cmd_buff = Kernel::GetCommandBuffer(); 113 u32* cmd_buff = Kernel::GetCommandBuffer();
123 114
124 conversion.params.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]); 115 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]);
125 LOG_DEBUG(Service_Y2R, "called alignment=%u", conversion.params.block_alignment); 116 LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment);
117
118 cmd_buff[1] = RESULT_SUCCESS.raw;
119}
120
121static void SetTransferEndInterrupt(Service::Interface* self) {
122 u32* cmd_buff = Kernel::GetCommandBuffer();
126 123
124 cmd_buff[0] = 0x000D0040;
127 cmd_buff[1] = RESULT_SUCCESS.raw; 125 cmd_buff[1] = RESULT_SUCCESS.raw;
126 LOG_DEBUG(Service_Y2R, "(STUBBED) called");
128} 127}
129 128
130/** 129/**
@@ -147,11 +146,56 @@ static void SetSendingY(Service::Interface* self) {
147 conversion.src_Y.address = cmd_buff[1]; 146 conversion.src_Y.address = cmd_buff[1];
148 conversion.src_Y.image_size = cmd_buff[2]; 147 conversion.src_Y.image_size = cmd_buff[2];
149 conversion.src_Y.transfer_unit = cmd_buff[3]; 148 conversion.src_Y.transfer_unit = cmd_buff[3];
150 conversion.src_Y.stride = cmd_buff[4]; 149 conversion.src_Y.gap = cmd_buff[4];
151 u32 src_process_handle = cmd_buff[6]; 150 u32 src_process_handle = cmd_buff[6];
152 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " 151 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
153 "src_process_handle=0x%08X", conversion.src_Y.image_size, 152 "src_process_handle=0x%08X", conversion.src_Y.image_size,
154 conversion.src_Y.transfer_unit, conversion.src_Y.stride, src_process_handle); 153 conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle);
154
155 cmd_buff[1] = RESULT_SUCCESS.raw;
156}
157
158static void SetSendingU(Service::Interface* self) {
159 u32* cmd_buff = Kernel::GetCommandBuffer();
160
161 conversion.src_U.address = cmd_buff[1];
162 conversion.src_U.image_size = cmd_buff[2];
163 conversion.src_U.transfer_unit = cmd_buff[3];
164 conversion.src_U.gap = cmd_buff[4];
165 u32 src_process_handle = cmd_buff[6];
166 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
167 "src_process_handle=0x%08X", conversion.src_U.image_size,
168 conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle);
169
170 cmd_buff[1] = RESULT_SUCCESS.raw;
171}
172
173static void SetSendingV(Service::Interface* self) {
174 u32* cmd_buff = Kernel::GetCommandBuffer();
175
176 conversion.src_V.address = cmd_buff[1];
177 conversion.src_V.image_size = cmd_buff[2];
178 conversion.src_V.transfer_unit = cmd_buff[3];
179 conversion.src_V.gap = cmd_buff[4];
180 u32 src_process_handle = cmd_buff[6];
181 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
182 "src_process_handle=0x%08X", conversion.src_V.image_size,
183 conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle);
184
185 cmd_buff[1] = RESULT_SUCCESS.raw;
186}
187
188static void SetSendingYUYV(Service::Interface* self) {
189 u32* cmd_buff = Kernel::GetCommandBuffer();
190
191 conversion.src_YUYV.address = cmd_buff[1];
192 conversion.src_YUYV.image_size = cmd_buff[2];
193 conversion.src_YUYV.transfer_unit = cmd_buff[3];
194 conversion.src_YUYV.gap = cmd_buff[4];
195 u32 src_process_handle = cmd_buff[6];
196 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
197 "src_process_handle=0x%08X", conversion.src_YUYV.image_size,
198 conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle);
155 199
156 cmd_buff[1] = RESULT_SUCCESS.raw; 200 cmd_buff[1] = RESULT_SUCCESS.raw;
157} 201}
@@ -162,11 +206,11 @@ static void SetReceiving(Service::Interface* self) {
162 conversion.dst.address = cmd_buff[1]; 206 conversion.dst.address = cmd_buff[1];
163 conversion.dst.image_size = cmd_buff[2]; 207 conversion.dst.image_size = cmd_buff[2];
164 conversion.dst.transfer_unit = cmd_buff[3]; 208 conversion.dst.transfer_unit = cmd_buff[3];
165 conversion.dst.stride = cmd_buff[4]; 209 conversion.dst.gap = cmd_buff[4];
166 u32 dst_process_handle = cmd_buff[6]; 210 u32 dst_process_handle = cmd_buff[6];
167 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " 211 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
168 "dst_process_handle=0x%08X", conversion.dst.image_size, 212 "dst_process_handle=0x%08X", conversion.dst.image_size,
169 conversion.dst.transfer_unit, conversion.dst.stride, 213 conversion.dst.transfer_unit, conversion.dst.gap,
170 dst_process_handle); 214 dst_process_handle);
171 215
172 cmd_buff[1] = RESULT_SUCCESS.raw; 216 cmd_buff[1] = RESULT_SUCCESS.raw;
@@ -175,107 +219,54 @@ static void SetReceiving(Service::Interface* self) {
175static void SetInputLineWidth(Service::Interface* self) { 219static void SetInputLineWidth(Service::Interface* self) {
176 u32* cmd_buff = Kernel::GetCommandBuffer(); 220 u32* cmd_buff = Kernel::GetCommandBuffer();
177 221
178 conversion.params.input_line_width = cmd_buff[1]; 222 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]);
179 LOG_DEBUG(Service_Y2R, "input_line_width=%u", conversion.params.input_line_width); 223 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw;
180
181 cmd_buff[1] = RESULT_SUCCESS.raw;
182} 224}
183 225
184static void SetInputLines(Service::Interface* self) { 226static void SetInputLines(Service::Interface* self) {
185 u32* cmd_buff = Kernel::GetCommandBuffer(); 227 u32* cmd_buff = Kernel::GetCommandBuffer();
186 228
187 conversion.params.input_lines = cmd_buff[1]; 229 LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]);
188 LOG_DEBUG(Service_Y2R, "input_line_number=%u", conversion.params.input_lines); 230 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw;
189
190 cmd_buff[1] = RESULT_SUCCESS.raw;
191} 231}
192 232
193static void StartConversion(Service::Interface* self) { 233static void SetCoefficient(Service::Interface* self) {
194 u32* cmd_buff = Kernel::GetCommandBuffer(); 234 u32* cmd_buff = Kernel::GetCommandBuffer();
195 235
196 const ConversionParameters& params = conversion.params; 236 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]);
197 237 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet));
198 const u8* srcY_buffer = Memory::GetPointer(conversion.src_Y.address); 238 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]",
199 u8* dst_buffer = Memory::GetPointer(conversion.dst.address); 239 coefficients[0], coefficients[1], coefficients[2], coefficients[3],
200 240 coefficients[4], coefficients[5], coefficients[6], coefficients[7]);
201 // TODO: support color and other kinds of conversions
202 ASSERT(params.input_format == InputFormat::YUV422_Indiv8
203 || params.input_format == InputFormat::YUV420_Indiv8);
204 ASSERT(params.output_format == OutputFormat::Rgb24);
205 ASSERT(params.rotation == Rotation::None);
206 const int bpp = 3;
207
208 switch (params.block_alignment) {
209 case BlockAlignment::Linear:
210 {
211 const size_t input_lines = params.input_lines;
212 const size_t input_line_width = params.input_line_width;
213 const size_t srcY_stride = conversion.src_Y.stride;
214 const size_t dst_stride = conversion.dst.stride;
215
216 size_t srcY_offset = 0;
217 size_t dst_offset = 0;
218
219 for (size_t line = 0; line < input_lines; ++line) {
220 for (size_t i = 0; i < input_line_width; ++i) {
221 u8 Y = srcY_buffer[srcY_offset];
222 dst_buffer[dst_offset + 0] = Y;
223 dst_buffer[dst_offset + 1] = Y;
224 dst_buffer[dst_offset + 2] = Y;
225
226 srcY_offset += 1;
227 dst_offset += bpp;
228 }
229 srcY_offset += srcY_stride;
230 dst_offset += dst_stride;
231 }
232 break;
233 }
234 case BlockAlignment::Block8x8:
235 {
236 const size_t input_lines = params.input_lines;
237 const size_t input_line_width = params.input_line_width;
238 const size_t srcY_stride = conversion.src_Y.stride;
239 const size_t dst_transfer_unit = conversion.dst.transfer_unit;
240 const size_t dst_stride = conversion.dst.stride;
241
242 size_t srcY_offset = 0;
243 size_t dst_tile_line_offs = 0;
244 241
245 const size_t tile_size = 8 * 8 * bpp; 242 cmd_buff[1] = RESULT_SUCCESS.raw;
243}
246 244
247 for (size_t line = 0; line < input_lines;) { 245static void SetStandardCoefficient(Service::Interface* self) {
248 size_t max_line = line + 8; 246 u32* cmd_buff = Kernel::GetCommandBuffer();
249 247
250 for (; line < max_line; ++line) { 248 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]);
251 for (size_t x = 0; x < input_line_width; ++x) {
252 size_t tile_x = x / 8;
253 249
254 size_t dst_tile_offs = dst_tile_line_offs + tile_x * tile_size; 250 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw;
255 size_t tile_i = VideoCore::MortonInterleave((u32)x, (u32)line); 251}
256 252
257 size_t dst_offset = dst_tile_offs + tile_i * bpp; 253static void SetAlpha(Service::Interface* self) {
254 u32* cmd_buff = Kernel::GetCommandBuffer();
258 255
259 u8 Y = srcY_buffer[srcY_offset]; 256 conversion.alpha = cmd_buff[1];
260 dst_buffer[dst_offset + 0] = Y; 257 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
261 dst_buffer[dst_offset + 1] = Y;
262 dst_buffer[dst_offset + 2] = Y;
263 258
264 srcY_offset += 1; 259 cmd_buff[1] = RESULT_SUCCESS.raw;
265 } 260}
266 261
267 srcY_offset += srcY_stride; 262static void StartConversion(Service::Interface* self) {
268 } 263 u32* cmd_buff = Kernel::GetCommandBuffer();
269 264
270 dst_tile_line_offs += dst_transfer_unit + dst_stride; 265 HW::Y2R::PerformConversion(conversion);
271 }
272 break;
273 }
274 }
275 266
276 // dst_image_size would seem to be perfect for this, but it doesn't include the stride :( 267 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
277 u32 total_output_size = params.input_lines * 268 u32 total_output_size = conversion.input_lines *
278 (conversion.dst.transfer_unit + conversion.dst.stride); 269 (conversion.dst.transfer_unit + conversion.dst.gap);
279 VideoCore::g_renderer->hw_rasterizer->NotifyFlush( 270 VideoCore::g_renderer->hw_rasterizer->NotifyFlush(
280 Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size); 271 Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
281 272
@@ -285,6 +276,14 @@ static void StartConversion(Service::Interface* self) {
285 cmd_buff[1] = RESULT_SUCCESS.raw; 276 cmd_buff[1] = RESULT_SUCCESS.raw;
286} 277}
287 278
279static void StopConversion(Service::Interface* self) {
280 u32* cmd_buff = Kernel::GetCommandBuffer();
281
282 cmd_buff[0] = 0x00270040;
283 cmd_buff[1] = RESULT_SUCCESS.raw;
284 LOG_DEBUG(Service_Y2R, "called");
285}
286
288/** 287/**
289 * Y2R_U::IsBusyConversion service function 288 * Y2R_U::IsBusyConversion service function
290 * Outputs: 289 * Outputs:
@@ -306,15 +305,31 @@ static void SetConversionParams(Service::Interface* self) {
306 u32* cmd_buff = Kernel::GetCommandBuffer(); 305 u32* cmd_buff = Kernel::GetCommandBuffer();
307 306
308 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]); 307 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]);
309 conversion.params = *params;
310
311 cmd_buff[0] = 0x00290000; // TODO verify
312 cmd_buff[1] = RESULT_SUCCESS.raw;
313 LOG_DEBUG(Service_Y2R, 308 LOG_DEBUG(Service_Y2R,
314 "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu " 309 "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
315 "input_line_width=%hX input_lines=%hu standard_coefficient=%hhu reserved=%hhu alpha=%hX", 310 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu "
311 "reserved=%hhu alpha=%hX",
316 params->input_format, params->output_format, params->rotation, params->block_alignment, 312 params->input_format, params->output_format, params->rotation, params->block_alignment,
317 params->input_line_width, params->input_lines, params->standard_coefficient); 313 params->input_line_width, params->input_lines, params->standard_coefficient,
314 params->reserved, params->alpha);
315
316 ResultCode result = RESULT_SUCCESS;
317
318 conversion.input_format = params->input_format;
319 conversion.output_format = params->output_format;
320 conversion.rotation = params->rotation;
321 conversion.block_alignment = params->block_alignment;
322 result = conversion.SetInputLineWidth(params->input_line_width);
323 if (result.IsError()) goto cleanup;
324 result = conversion.SetInputLines(params->input_lines);
325 if (result.IsError()) goto cleanup;
326 result = conversion.SetStandardCoefficient(params->standard_coefficient);
327 if (result.IsError()) goto cleanup;
328 conversion.alpha = params->alpha;
329
330cleanup:
331 cmd_buff[0] = 0x00290040; // TODO verify
332 cmd_buff[1] = result.raw;
318} 333}
319 334
320static void PingProcess(Service::Interface* self) { 335static void PingProcess(Service::Interface* self) {
@@ -325,28 +340,63 @@ static void PingProcess(Service::Interface* self) {
325 LOG_WARNING(Service_Y2R, "(STUBBED) called"); 340 LOG_WARNING(Service_Y2R, "(STUBBED) called");
326} 341}
327 342
343static void DriverInitialize(Service::Interface* self) {
344 u32* cmd_buff = Kernel::GetCommandBuffer();
345
346 conversion.input_format = InputFormat::YUV422_Indiv8;
347 conversion.output_format = OutputFormat::RGBA8;
348 conversion.rotation = Rotation::None;
349 conversion.block_alignment = BlockAlignment::Linear;
350 conversion.coefficients.fill(0);
351 conversion.SetInputLineWidth(1024);
352 conversion.SetInputLines(1024);
353 conversion.alpha = 0;
354
355 ConversionBuffer zero_buffer = {};
356 conversion.src_Y = zero_buffer;
357 conversion.src_U = zero_buffer;
358 conversion.src_V = zero_buffer;
359 conversion.dst = zero_buffer;
360
361 completion_event->Clear();
362
363 cmd_buff[0] = 0x002B0040;
364 cmd_buff[1] = RESULT_SUCCESS.raw;
365 LOG_DEBUG(Service_Y2R, "called");
366}
367
368static void DriverFinalize(Service::Interface* self) {
369 u32* cmd_buff = Kernel::GetCommandBuffer();
370
371 cmd_buff[0] = 0x002C0040;
372 cmd_buff[1] = RESULT_SUCCESS.raw;
373 LOG_DEBUG(Service_Y2R, "called");
374}
375
328const Interface::FunctionInfo FunctionTable[] = { 376const Interface::FunctionInfo FunctionTable[] = {
329 {0x00010040, SetInputFormat, "SetInputFormat"}, 377 {0x00010040, SetInputFormat, "SetInputFormat"},
330 {0x00030040, SetOutputFormat, "SetOutputFormat"}, 378 {0x00030040, SetOutputFormat, "SetOutputFormat"},
331 {0x00050040, SetRotation, "SetRotation"}, 379 {0x00050040, SetRotation, "SetRotation"},
332 {0x00070040, SetBlockAlignment, "SetBlockAlignment"}, 380 {0x00070040, SetBlockAlignment, "SetBlockAlignment"},
333 {0x000D0040, nullptr, "SetTransferEndInterrupt"}, 381 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"},
334 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"}, 382 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"},
335 {0x00100102, SetSendingY, "SetSendingY"}, 383 {0x00100102, SetSendingY, "SetSendingY"},
336 {0x00110102, nullptr, "SetSendingU"}, 384 {0x00110102, SetSendingU, "SetSendingU"},
337 {0x00120102, nullptr, "SetSendingV"}, 385 {0x00120102, SetSendingV, "SetSendingV"},
386 {0x00130102, SetSendingYUYV, "SetSendingYUYV"},
338 {0x00180102, SetReceiving, "SetReceiving"}, 387 {0x00180102, SetReceiving, "SetReceiving"},
339 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"}, 388 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"},
340 {0x001C0040, SetInputLines, "SetInputLines"}, 389 {0x001C0040, SetInputLines, "SetInputLines"},
341 {0x00200040, nullptr, "SetStandardCoefficient"}, 390 {0x001E0100, SetCoefficient, "SetCoefficient"},
342 {0x00220040, nullptr, "SetAlpha"}, 391 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"},
392 {0x00220040, SetAlpha, "SetAlpha"},
343 {0x00260000, StartConversion, "StartConversion"}, 393 {0x00260000, StartConversion, "StartConversion"},
344 {0x00270000, nullptr, "StopConversion"}, 394 {0x00270000, StopConversion, "StopConversion"},
345 {0x00280000, IsBusyConversion, "IsBusyConversion"}, 395 {0x00280000, IsBusyConversion, "IsBusyConversion"},
346 {0x002901C0, SetConversionParams, "SetConversionParams"}, 396 {0x002901C0, SetConversionParams, "SetConversionParams"},
347 {0x002A0000, PingProcess, "PingProcess"}, 397 {0x002A0000, PingProcess, "PingProcess"},
348 {0x002B0000, nullptr, "DriverInitialize"}, 398 {0x002B0000, DriverInitialize, "DriverInitialize"},
349 {0x002C0000, nullptr, "DriverFinalize"}, 399 {0x002C0000, DriverFinalize, "DriverFinalize"},
350}; 400};
351 401
352//////////////////////////////////////////////////////////////////////////////////////////////////// 402////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/y2r_u.h b/src/core/hle/service/y2r_u.h
index 171aecfd1..7df47fcb9 100644
--- a/src/core/hle/service/y2r_u.h
+++ b/src/core/hle/service/y2r_u.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8
9#include "common/common_types.h"
10
7#include "core/hle/service/service.h" 11#include "core/hle/service/service.h"
8 12
9//////////////////////////////////////////////////////////////////////////////////////////////////// 13////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -11,6 +15,98 @@
11 15
12namespace Y2R_U { 16namespace Y2R_U {
13 17
18enum class InputFormat : u8 {
19 /// 8-bit input, with YUV components in separate planes and 4:2:2 subsampling.
20 YUV422_Indiv8 = 0,
21 /// 8-bit input, with YUV components in separate planes and 4:2:0 subsampling.
22 YUV420_Indiv8 = 1,
23
24 /// 16-bit input (only LSB used), with YUV components in separate planes and 4:2:2 subsampling.
25 YUV422_Indiv16 = 2,
26 /// 16-bit input (only LSB used), with YUV components in separate planes and 4:2:0 subsampling.
27 YUV420_Indiv16 = 3,
28
29 /// 8-bit input, with a single interleaved stream in YUYV format and 4:2:2 subsampling.
30 YUYV422_Interleaved = 4,
31};
32
33enum class OutputFormat : u8 {
34 RGBA8 = 0,
35 RGB8 = 1,
36 RGB5A1 = 2,
37 RGB565 = 3,
38};
39
40enum class Rotation : u8 {
41 None = 0,
42 Clockwise_90 = 1,
43 Clockwise_180 = 2,
44 Clockwise_270 = 3,
45};
46
47enum class BlockAlignment : u8 {
48 /// Image is output in linear format suitable for use as a framebuffer.
49 Linear = 0,
50 /// Image is output in tiled PICA format, suitable for use as a texture.
51 Block8x8 = 1,
52};
53
54enum class StandardCoefficient : u8 {
55 /// ITU Rec. BT.601 primaries, with PC ranges.
56 ITU_Rec601 = 0,
57 /// ITU Rec. BT.709 primaries, with PC ranges.
58 ITU_Rec709 = 1,
59 /// ITU Rec. BT.601 primaries, with TV ranges.
60 ITU_Rec601_Scaling = 2,
61 /// ITU Rec. BT.709 primaries, with TV ranges.
62 ITU_Rec709_Scaling = 3,
63};
64
65/**
66 * A set of coefficients configuring the RGB to YUV conversion. Coefficients 0-4 are unsigned 2.8
67 * fixed pointer numbers representing entries on the conversion matrix, while coefficient 5-7 are
68 * signed 11.5 fixed point numbers added as offsets to the RGB result.
69 *
70 * The overall conversion process formula is:
71 * ```
72 * R = trunc((c_0 * Y + c_1 * V) + c_5 + 0.75)
73 * G = trunc((c_0 * Y - c_3 * U - c_2 * V) + c_6 + 0.75)
74 * B = trunc((c_0 * Y + c_4 * U ) + c_7 + 0.75)
75 * ```
76 */
77using CoefficientSet = std::array<s16, 8>;
78
79struct ConversionBuffer {
80 /// Current reading/writing address of this buffer.
81 VAddr address;
82 /// Remaining amount of bytes to be DMAed, does not include the inter-trasfer gap.
83 u32 image_size;
84 /// Size of a single DMA transfer.
85 u16 transfer_unit;
86 /// Amount of bytes to be skipped between copying each `transfer_unit` bytes.
87 u16 gap;
88};
89
90struct ConversionConfiguration {
91 InputFormat input_format;
92 OutputFormat output_format;
93 Rotation rotation;
94 BlockAlignment block_alignment;
95 u16 input_line_width;
96 u16 input_lines;
97 CoefficientSet coefficients;
98 u16 alpha;
99
100 /// Input parameters for the Y (luma) plane
101 ConversionBuffer src_Y, src_U, src_V, src_YUYV;
102 /// Output parameters for the conversion results
103 ConversionBuffer dst;
104
105 ResultCode SetInputLineWidth(u16 width);
106 ResultCode SetInputLines(u16 lines);
107 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient);
108};
109
14class Interface : public Service::Interface { 110class Interface : public Service::Interface {
15public: 111public:
16 Interface(); 112 Interface();
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
new file mode 100644
index 000000000..5b7fb39e1
--- /dev/null
+++ b/src/core/hw/y2r.cpp
@@ -0,0 +1,369 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <numeric>
7
8#include "common/assert.h"
9#include "common/color.h"
10#include "common/common_types.h"
11#include "common/math_util.h"
12#include "common/vector_math.h"
13
14#include "core/hle/service/y2r_u.h"
15#include "core/memory.h"
16
17namespace HW {
18namespace Y2R {
19
20using namespace Y2R_U;
21
22static const size_t MAX_TILES = 1024 / 8;
23static const size_t TILE_SIZE = 8 * 8;
24using ImageTile = std::array<u32, TILE_SIZE>;
25
26/// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles.
27static void ConvertYUVToRGB(InputFormat input_format,
28 const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[],
29 unsigned int width, unsigned int height, const CoefficientSet& coefficients) {
30
31 for (unsigned int y = 0; y < height; ++y) {
32 for (unsigned int x = 0; x < width; ++x) {
33 s32 Y, U, V;
34 switch (input_format) {
35 case InputFormat::YUV422_Indiv8:
36 case InputFormat::YUV422_Indiv16:
37 Y = input_Y[y * width + x];
38 U = input_U[(y * width + x) / 2];
39 V = input_V[(y * width + x) / 2];
40 break;
41 case InputFormat::YUV420_Indiv8:
42 case InputFormat::YUV420_Indiv16:
43 Y = input_Y[y * width + x];
44 U = input_U[((y / 2) * width + x) / 2];
45 V = input_V[((y / 2) * width + x) / 2];
46 break;
47 case InputFormat::YUYV422_Interleaved:
48 Y = input_Y[(y * width + x) * 2];
49 U = input_Y[(y * width + (x / 2) * 2) * 2 + 1];
50 V = input_Y[(y * width + (x / 2) * 2) * 2 + 3];
51 break;
52 }
53
54 // This conversion process is bit-exact with hardware, as far as could be tested.
55 auto& c = coefficients;
56 s32 cY = c[0]*Y;
57
58 s32 r = cY + c[1]*V;
59 s32 g = cY - c[3]*U - c[2]*V;
60 s32 b = cY + c[4]*U;
61
62 const s32 rounding_offset = 0x18;
63 r = (r >> 3) + c[5] + rounding_offset;
64 g = (g >> 3) + c[6] + rounding_offset;
65 b = (b >> 3) + c[7] + rounding_offset;
66
67 unsigned int tile = x / 8;
68 unsigned int tile_x = x % 8;
69 u32* out = &output[tile][y * 8 + tile_x];
70
71 using MathUtil::Clamp;
72 *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) |
73 ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
74 ((u32)Clamp(b >> 5, 0, 0xFF) << 8);
75 }
76 }
77}
78
79/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit.
80template <size_t N>
81static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) {
82 const u8* input = Memory::GetPointer(buf.address);
83
84 size_t output_unit = buf.transfer_unit / N;
85 ASSERT(amount_of_data % output_unit == 0);
86
87 while (amount_of_data > 0) {
88 for (size_t i = 0; i < output_unit; ++i) {
89 output[i] = input[i * N];
90 }
91
92 output += output_unit;
93 input += buf.transfer_unit + buf.gap;
94
95 buf.address += buf.transfer_unit + buf.gap;
96 buf.image_size -= buf.transfer_unit;
97 amount_of_data -= output_unit;
98 }
99}
100
101/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer.
102static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data,
103 OutputFormat output_format, u8 alpha) {
104
105 u8* output = Memory::GetPointer(buf.address);
106
107 while (amount_of_data > 0) {
108 u8* unit_end = output + buf.transfer_unit;
109 while (output < unit_end) {
110 u32 color = *input++;
111 Math::Vec4<u8> col_vec{
112 (color >> 24) & 0xFF, (color >> 16) & 0xFF, (color >> 8) & 0xFF, alpha,
113 };
114
115 switch (output_format) {
116 case OutputFormat::RGBA8:
117 Color::EncodeRGBA8(col_vec, output);
118 output += 4;
119 break;
120 case OutputFormat::RGB8:
121 Color::EncodeRGB8(col_vec, output);
122 output += 3;
123 break;
124 case OutputFormat::RGB5A1:
125 Color::EncodeRGB5A1(col_vec, output);
126 output += 2;
127 break;
128 case OutputFormat::RGB565:
129 Color::EncodeRGB565(col_vec, output);
130 output += 2;
131 break;
132 }
133
134 amount_of_data -= 1;
135 }
136
137 output += buf.gap;
138 buf.address += buf.transfer_unit + buf.gap;
139 buf.image_size -= buf.transfer_unit;
140 }
141}
142
143static const u8 linear_lut[64] = {
144 0, 1, 2, 3, 4, 5, 6, 7,
145 8, 9, 10, 11, 12, 13, 14, 15,
146 16, 17, 18, 19, 20, 21, 22, 23,
147 24, 25, 26, 27, 28, 29, 30, 31,
148 32, 33, 34, 35, 36, 37, 38, 39,
149 40, 41, 42, 43, 44, 45, 46, 47,
150 48, 49, 50, 51, 52, 53, 54, 55,
151 56, 57, 58, 59, 60, 61, 62, 63,
152};
153
154static const u8 morton_lut[64] = {
155 0, 1, 4, 5, 16, 17, 20, 21,
156 2, 3, 6, 7, 18, 19, 22, 23,
157 8, 9, 12, 13, 24, 25, 28, 29,
158 10, 11, 14, 15, 26, 27, 30, 31,
159 32, 33, 36, 37, 48, 49, 52, 53,
160 34, 35, 38, 39, 50, 51, 54, 55,
161 40, 41, 44, 45, 56, 57, 60, 61,
162 42, 43, 46, 47, 58, 59, 62, 63,
163};
164
165static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
166 for (int i = 0; i < height * 8; ++i) {
167 output[out_map[i]] = input[i];
168 }
169}
170
171static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
172 int out_i = 0;
173 for (int x = 0; x < 8; ++x) {
174 for (int y = height - 1; y >= 0; --y) {
175 output[out_map[out_i++]] = input[y * 8 + x];
176 }
177 }
178}
179
180static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
181 int out_i = 0;
182 for (int i = height * 8 - 1; i >= 0; --i) {
183 output[out_map[out_i++]] = input[i];
184 }
185}
186
187static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
188 int out_i = 0;
189 for (int x = 8-1; x >= 0; --x) {
190 for (int y = 0; y < height; ++y) {
191 output[out_map[out_i++]] = input[y * 8 + x];
192 }
193 }
194}
195
196static void WriteTileToOutput(u32* output, const ImageTile& tile, int height, int line_stride) {
197 for (int y = 0; y < height; ++y) {
198 for (int x = 0; x < 8; ++x) {
199 output[y * line_stride + x] = tile[y * 8 + x];
200 }
201 }
202}
203
204/**
205 * Performs a Y2R colorspace conversion.
206 *
207 * The Y2R hardware implements hardware-accelerated YUV to RGB colorspace conversions. It is most
208 * commonly used for video playback or to display camera input to the screen.
209 *
210 * The conversion process is quite configurable, and can be divided in distinct steps. From
211 * observation, it appears that the hardware buffers a single 8-pixel tall strip of image data
212 * internally and converts it in one go before writing to the output and loading the next strip.
213 *
214 * The steps taken to convert one strip of image data are:
215 *
216 * - The hardware receives data via CDMA (http://3dbrew.org/wiki/Corelink_DMA_Engines), which is
217 * presumably stored in one or more internal buffers. This process can be done in several separate
218 * transfers, as long as they don't exceed the size of the internal image buffer. This allows
219 * flexibility in input strides.
220 * - The input data is decoded into a YUV tuple. Several formats are suported, see the `InputFormat`
221 * enum.
222 * - The YUV tuple is converted, using fixed point calculations, to RGB. This step can be configured
223 * using a set of coefficients to support different colorspace standards. See `CoefficientSet`.
224 * - The strip can be optionally rotated 90, 180 or 270 degrees. Since each strip is processed
225 * independently, this notably rotates each *strip*, not the entire image. This means that for 90
226 * or 270 degree rotations, the output will be in terms of several 8 x height images, and for any
227 * non-zero rotation the strips will have to be re-arranged so that the parts of the image will
228 * not be shuffled together. This limitation makes this a feature of somewhat dubious utility. 90
229 * or 270 degree rotations in images with non-even height don't seem to work properly.
230 * - The data is converted to the output RGB format. See the `OutputFormat` enum.
231 * - The data can be output either linearly line-by-line or in the swizzled 8x8 tile format used by
232 * the PICA. This is decided by the `BlockAlignment` enum. If 8x8 alignment is used, then the
233 * image must have a height divisible by 8. The image width must always be divisible by 8.
234 * - The final data is then CDMAed out to main memory and the next image strip is processed. This
235 * offers the same flexibility as the input stage.
236 *
237 * In this implementation, to avoid the combinatorial explosion of parameter combinations, common
238 * intermediate formats are used and where possible tables or parameters are used instead of
239 * diverging code paths to keep the amount of branches in check. Some steps are also merged to
240 * increase efficiency.
241 *
242 * Output for all valid settings combinations matches hardware, however output in some edge-cases
243 * differs:
244 *
245 * - `Block8x8` alignment with non-mod8 height produces different garbage patterns on the last
246 * strip, especially when combined with rotation.
247 * - Hardware, when using `Linear` alignment with a non-even height and 90 or 270 degree rotation
248 * produces misaligned output on the last strip. This implmentation produces output with the
249 * correct "expected" alignment.
250 *
251 * Hardware behaves strangely (doesn't fire the completion interrupt, for example) in these cases,
252 * so they are believed to be invalid configurations anyway.
253 */
254void PerformConversion(ConversionConfiguration& cvt) {
255 ASSERT(cvt.input_line_width % 8 == 0);
256 ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0);
257 // Tiles per row
258 size_t num_tiles = cvt.input_line_width / 8;
259 ASSERT(num_tiles < MAX_TILES);
260
261 // Buffer used as a CDMA source/target.
262 std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]);
263 // Intermediate storage for decoded 8x8 image tiles. Always stored as RGB32.
264 std::unique_ptr<ImageTile[]> tiles(new ImageTile[num_tiles]);
265 ImageTile tmp_tile;
266
267 // LUT used to remap writes to a tile. Used to allow linear or swizzled output without
268 // requiring two different code paths.
269 const u8* tile_remap;
270 switch (cvt.block_alignment) {
271 case BlockAlignment::Linear:
272 tile_remap = linear_lut; break;
273 case BlockAlignment::Block8x8:
274 tile_remap = morton_lut; break;
275 }
276
277 for (unsigned int y = 0; y < cvt.input_lines; y += 8) {
278 unsigned int row_height = std::min(cvt.input_lines - y, 8u);
279
280 // Total size in pixels of incoming data required for this strip.
281 const size_t row_data_size = row_height * cvt.input_line_width;
282
283 u8* input_Y = data_buffer.get();
284 u8* input_U = input_Y + 8 * cvt.input_line_width;
285 u8* input_V = input_U + 8 * cvt.input_line_width / 2;
286
287 switch (cvt.input_format) {
288 case InputFormat::YUV422_Indiv8:
289 ReceiveData<1>(input_Y, cvt.src_Y, row_data_size);
290 ReceiveData<1>(input_U, cvt.src_U, row_data_size / 2);
291 ReceiveData<1>(input_V, cvt.src_V, row_data_size / 2);
292 break;
293 case InputFormat::YUV420_Indiv8:
294 ReceiveData<1>(input_Y, cvt.src_Y, row_data_size);
295 ReceiveData<1>(input_U, cvt.src_U, row_data_size / 4);
296 ReceiveData<1>(input_V, cvt.src_V, row_data_size / 4);
297 break;
298 case InputFormat::YUV422_Indiv16:
299 ReceiveData<2>(input_Y, cvt.src_Y, row_data_size);
300 ReceiveData<2>(input_U, cvt.src_U, row_data_size / 2);
301 ReceiveData<2>(input_V, cvt.src_V, row_data_size / 2);
302 break;
303 case InputFormat::YUV420_Indiv16:
304 ReceiveData<2>(input_Y, cvt.src_Y, row_data_size);
305 ReceiveData<2>(input_U, cvt.src_U, row_data_size / 4);
306 ReceiveData<2>(input_V, cvt.src_V, row_data_size / 4);
307 break;
308 case InputFormat::YUYV422_Interleaved:
309 input_U = nullptr;
310 input_V = nullptr;
311 ReceiveData<1>(input_Y, cvt.src_YUYV, row_data_size * 2);
312 break;
313 }
314
315 // Note(yuriks): If additional optimization is required, input_format can be moved to a
316 // template parameter, so that its dispatch can be moved to outside the inner loop.
317 ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(),
318 cvt.input_line_width, row_height, cvt.coefficients);
319
320 u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get());
321
322 for (int i = 0; i < num_tiles; ++i) {
323 int image_strip_width, output_stride;
324
325 switch (cvt.rotation) {
326 case Rotation::None:
327 RotateTile0(tiles[i], tmp_tile, row_height, tile_remap);
328 image_strip_width = cvt.input_line_width;
329 output_stride = 8;
330 break;
331 case Rotation::Clockwise_90:
332 RotateTile90(tiles[i], tmp_tile, row_height, tile_remap);
333 image_strip_width = 8;
334 output_stride = 8 * row_height;
335 break;
336 case Rotation::Clockwise_180:
337 // For 180 and 270 degree rotations we also invert the order of tiles in the strip,
338 // since the rotates are done individually on each tile.
339 RotateTile180(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap);
340 image_strip_width = cvt.input_line_width;
341 output_stride = 8;
342 break;
343 case Rotation::Clockwise_270:
344 RotateTile270(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap);
345 image_strip_width = 8;
346 output_stride = 8 * row_height;
347 break;
348 }
349
350 switch (cvt.block_alignment) {
351 case BlockAlignment::Linear:
352 WriteTileToOutput(output_buffer, tmp_tile, row_height, image_strip_width);
353 output_buffer += output_stride;
354 break;
355 case BlockAlignment::Block8x8:
356 WriteTileToOutput(output_buffer, tmp_tile, 8, 8);
357 output_buffer += TILE_SIZE;
358 break;
359 }
360 }
361
362 // Note(yuriks): If additional optimization is required, output_format can be moved to a
363 // template parameter, so that its dispatch can be moved to outside the inner loop.
364 SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha);
365 }
366}
367
368}
369}
diff --git a/src/core/hw/y2r.h b/src/core/hw/y2r.h
new file mode 100644
index 000000000..729e1eee3
--- /dev/null
+++ b/src/core/hw/y2r.h
@@ -0,0 +1,15 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5namespace Y2R_U {
6 struct ConversionConfiguration;
7}
8
9namespace HW {
10namespace Y2R {
11
12void PerformConversion(Y2R_U::ConversionConfiguration& cvt);
13
14}
15}