summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/hle/service/y2r_u.cpp426
-rw-r--r--src/core/hle/service/y2r_u.h96
-rw-r--r--src/core/hw/y2r.cpp369
-rw-r--r--src/core/hw/y2r.h15
5 files changed, 734 insertions, 174 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 057b8ca0c..4fcda4874 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -108,6 +108,7 @@ set(SRCS
108 hw/gpu.cpp 108 hw/gpu.cpp
109 hw/hw.cpp 109 hw/hw.cpp
110 hw/lcd.cpp 110 hw/lcd.cpp
111 hw/y2r.cpp
111 loader/3dsx.cpp 112 loader/3dsx.cpp
112 loader/elf.cpp 113 loader/elf.cpp
113 loader/loader.cpp 114 loader/loader.cpp
@@ -233,6 +234,7 @@ set(HEADERS
233 hw/gpu.h 234 hw/gpu.h
234 hw/hw.h 235 hw/hw.h
235 hw/lcd.h 236 hw/lcd.h
237 hw/y2r.h
236 loader/3dsx.h 238 loader/3dsx.h
237 loader/elf.h 239 loader/elf.h
238 loader/loader.h 240 loader/loader.h
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 19bfde756..17cb4f0f0 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -9,8 +9,8 @@
9#include "core/hle/hle.h" 9#include "core/hle/hle.h"
10#include "core/hle/kernel/event.h" 10#include "core/hle/kernel/event.h"
11#include "core/hle/service/y2r_u.h" 11#include "core/hle/service/y2r_u.h"
12#include "core/hw/y2r.h"
12#include "core/mem_map.h" 13#include "core/mem_map.h"
13#include "core/memory.h"
14 14
15#include "video_core/utils.h" 15#include "video_core/utils.h"
16#include "video_core/video_core.h" 16#include "video_core/video_core.h"
@@ -20,75 +20,73 @@
20 20
21namespace Y2R_U { 21namespace Y2R_U {
22 22
23enum class InputFormat { 23struct ConversionParameters {
24 /// 8-bit input, with YUV components in separate planes and using 4:2:2 subsampling. 24 InputFormat input_format;
25 YUV422_Indiv8 = 0, 25 OutputFormat output_format;
26 /// 8-bit input, with YUV components in separate planes and using 4:2:0 subsampling. 26 Rotation rotation;
27 YUV420_Indiv8 = 1, 27 BlockAlignment block_alignment;
28 28 u16 input_line_width;
29 YUV422_INDIV_16 = 2, 29 u16 input_lines;
30 YUV420_INDIV_16 = 3, 30 StandardCoefficient standard_coefficient;
31 YUV422_BATCH = 4, 31 u8 reserved;
32 u16 alpha;
32}; 33};
34static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size");
33 35
34enum class OutputFormat { 36static Kernel::SharedPtr<Kernel::Event> completion_event;
35 Rgb32 = 0, 37static ConversionConfiguration conversion;
36 Rgb24 = 1,
37 Rgb16_555 = 2,
38 Rgb16_565 = 3,
39};
40 38
41enum class Rotation { 39static const CoefficientSet standard_coefficients[4] = {
42 None = 0, 40 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601
43 Clockwise_90 = 1, 41 {{ 0x100, 0x193, 0x77, 0x2F, 0x1DB, -0x1933, 0xA7C, -0x1D51 }}, // ITU_Rec709
44 Clockwise_180 = 2, 42 {{ 0x12A, 0x198, 0xD0, 0x64, 0x204, -0x1BDE, 0x10F2, -0x229B }}, // ITU_Rec601_Scaling
45 Clockwise_270 = 3, 43 {{ 0x12A, 0x1CA, 0x88, 0x36, 0x21C, -0x1F04, 0x99C, -0x2421 }}, // ITU_Rec709_Scaling
46}; 44};
47 45
48enum class BlockAlignment { 46ResultCode ConversionConfiguration::SetInputLineWidth(u16 width) {
49 /// Image is output in linear format suitable for use as a framebuffer. 47 if (width == 0 || width > 1024 || width % 8 != 0) {
50 Linear = 0, 48 return ResultCode(ErrorDescription::OutOfRange, ErrorModule::CAM,
51 /// Image is output in tiled PICA format, suitable for use as a texture. 49 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053FD
52 Block8x8 = 1, 50 }
53};
54 51
55enum class StandardCoefficient { 52 // Note: The hardware uses the register value 0 to represent a width of 1024, so for a width of
56 ITU_Rec601 = 0, 53 // 1024 the `camera` module would set the value 0 here, but we don't need to emulate this
57 ITU_Rec709 = 1, 54 // internal detail.
58 ITU_Rec601_Scaling = 2, 55 this->input_line_width = width;
59 ITU_Rec709_Scaling = 3, 56 return RESULT_SUCCESS;
60}; 57}
61 58
62static Kernel::SharedPtr<Kernel::Event> completion_event; 59ResultCode ConversionConfiguration::SetInputLines(u16 lines) {
60 if (lines == 0 || lines > 1024) {
61 return ResultCode(ErrorDescription::OutOfRange, ErrorModule::CAM,
62 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053FD
63 }
63 64
64struct ConversionParameters { 65 // Note: In what appears to be a bug, the `camera` module does not set the hardware register at
65 InputFormat input_format; 66 // all if `lines` is 1024, so the conversion uses the last value that was set. The intention
66 OutputFormat output_format; 67 // was probably to set it to 0 like in SetInputLineWidth.
67 Rotation rotation; 68 if (lines != 1024) {
68 BlockAlignment alignment; 69 this->input_lines = lines;
69 u16 input_line_width; 70 }
70 u16 input_lines; 71 return RESULT_SUCCESS;
72}
71 73
72 // Input parameters for the Y (luma) plane 74ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) {
73 VAddr srcY_address; 75 size_t index = static_cast<size_t>(standard_coefficient);
74 u32 srcY_image_size; 76 if (index >= 4) {
75 u16 srcY_transfer_unit; 77 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM,
76 u16 srcY_stride; 78 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED
77 79 }
78 // Output parameters for the conversion results
79 VAddr dst_address;
80 u32 dst_image_size;
81 u16 dst_transfer_unit;
82 u16 dst_stride;
83};
84 80
85static ConversionParameters conversion_params; 81 std::memcpy(coefficients.data(), standard_coefficients[index].data(), sizeof(coefficients));
82 return RESULT_SUCCESS;
83}
86 84
87static void SetInputFormat(Service::Interface* self) { 85static void SetInputFormat(Service::Interface* self) {
88 u32* cmd_buff = Kernel::GetCommandBuffer(); 86 u32* cmd_buff = Kernel::GetCommandBuffer();
89 87
90 conversion_params.input_format = static_cast<InputFormat>(cmd_buff[1]); 88 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]);
91 LOG_DEBUG(Service_Y2R, "called input_format=%u", conversion_params.input_format); 89 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
92 90
93 cmd_buff[1] = RESULT_SUCCESS.raw; 91 cmd_buff[1] = RESULT_SUCCESS.raw;
94} 92}
@@ -96,8 +94,8 @@ static void SetInputFormat(Service::Interface* self) {
96static void SetOutputFormat(Service::Interface* self) { 94static void SetOutputFormat(Service::Interface* self) {
97 u32* cmd_buff = Kernel::GetCommandBuffer(); 95 u32* cmd_buff = Kernel::GetCommandBuffer();
98 96
99 conversion_params.output_format = static_cast<OutputFormat>(cmd_buff[1]); 97 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]);
100 LOG_DEBUG(Service_Y2R, "called output_format=%u", conversion_params.output_format); 98 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
101 99
102 cmd_buff[1] = RESULT_SUCCESS.raw; 100 cmd_buff[1] = RESULT_SUCCESS.raw;
103} 101}
@@ -105,8 +103,8 @@ static void SetOutputFormat(Service::Interface* self) {
105static void SetRotation(Service::Interface* self) { 103static void SetRotation(Service::Interface* self) {
106 u32* cmd_buff = Kernel::GetCommandBuffer(); 104 u32* cmd_buff = Kernel::GetCommandBuffer();
107 105
108 conversion_params.rotation = static_cast<Rotation>(cmd_buff[1]); 106 conversion.rotation = static_cast<Rotation>(cmd_buff[1]);
109 LOG_DEBUG(Service_Y2R, "called rotation=%u", conversion_params.rotation); 107 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
110 108
111 cmd_buff[1] = RESULT_SUCCESS.raw; 109 cmd_buff[1] = RESULT_SUCCESS.raw;
112} 110}
@@ -114,18 +112,26 @@ static void SetRotation(Service::Interface* self) {
114static void SetBlockAlignment(Service::Interface* self) { 112static void SetBlockAlignment(Service::Interface* self) {
115 u32* cmd_buff = Kernel::GetCommandBuffer(); 113 u32* cmd_buff = Kernel::GetCommandBuffer();
116 114
117 conversion_params.alignment = static_cast<BlockAlignment>(cmd_buff[1]); 115 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]);
118 LOG_DEBUG(Service_Y2R, "called alignment=%u", conversion_params.alignment); 116 LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment);
119 117
120 cmd_buff[1] = RESULT_SUCCESS.raw; 118 cmd_buff[1] = RESULT_SUCCESS.raw;
121} 119}
122 120
121static void SetTransferEndInterrupt(Service::Interface* self) {
122 u32* cmd_buff = Kernel::GetCommandBuffer();
123
124 cmd_buff[0] = 0x000D0040;
125 cmd_buff[1] = RESULT_SUCCESS.raw;
126 LOG_DEBUG(Service_Y2R, "(STUBBED) called");
127}
128
123/** 129/**
124* Y2R_U::GetTransferEndEvent service function 130 * Y2R_U::GetTransferEndEvent service function
125* Outputs: 131 * Outputs:
126* 1 : Result of function, 0 on success, otherwise error code 132 * 1 : Result of function, 0 on success, otherwise error code
127* 3 : The handle of the completion event 133 * 3 : The handle of the completion event
128*/ 134 */
129static void GetTransferEndEvent(Service::Interface* self) { 135static void GetTransferEndEvent(Service::Interface* self) {
130 u32* cmd_buff = Kernel::GetCommandBuffer(); 136 u32* cmd_buff = Kernel::GetCommandBuffer();
131 137
@@ -137,138 +143,132 @@ static void GetTransferEndEvent(Service::Interface* self) {
137static void SetSendingY(Service::Interface* self) { 143static void SetSendingY(Service::Interface* self) {
138 u32* cmd_buff = Kernel::GetCommandBuffer(); 144 u32* cmd_buff = Kernel::GetCommandBuffer();
139 145
140 conversion_params.srcY_address = cmd_buff[1]; 146 conversion.src_Y.address = cmd_buff[1];
141 conversion_params.srcY_image_size = cmd_buff[2]; 147 conversion.src_Y.image_size = cmd_buff[2];
142 conversion_params.srcY_transfer_unit = cmd_buff[3]; 148 conversion.src_Y.transfer_unit = cmd_buff[3];
143 conversion_params.srcY_stride = cmd_buff[4]; 149 conversion.src_Y.gap = cmd_buff[4];
144 u32 src_process_handle = cmd_buff[6]; 150 u32 src_process_handle = cmd_buff[6];
145 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " 151 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
146 "src_process_handle=0x%08X", conversion_params.srcY_image_size, 152 "src_process_handle=0x%08X", conversion.src_Y.image_size,
147 conversion_params.srcY_transfer_unit, conversion_params.srcY_stride, src_process_handle); 153 conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle);
148 154
149 cmd_buff[1] = RESULT_SUCCESS.raw; 155 cmd_buff[1] = RESULT_SUCCESS.raw;
150} 156}
151 157
152static void SetReceiving(Service::Interface* self) { 158static void SetSendingU(Service::Interface* self) {
153 u32* cmd_buff = Kernel::GetCommandBuffer(); 159 u32* cmd_buff = Kernel::GetCommandBuffer();
154 160
155 conversion_params.dst_address = cmd_buff[1]; 161 conversion.src_U.address = cmd_buff[1];
156 conversion_params.dst_image_size = cmd_buff[2]; 162 conversion.src_U.image_size = cmd_buff[2];
157 conversion_params.dst_transfer_unit = cmd_buff[3]; 163 conversion.src_U.transfer_unit = cmd_buff[3];
158 conversion_params.dst_stride = cmd_buff[4]; 164 conversion.src_U.gap = cmd_buff[4];
159 u32 dst_process_handle = cmd_buff[6]; 165 u32 src_process_handle = cmd_buff[6];
160 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " 166 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
161 "dst_process_handle=0x%08X", conversion_params.dst_image_size, 167 "src_process_handle=0x%08X", conversion.src_U.image_size,
162 conversion_params.dst_transfer_unit, conversion_params.dst_stride, 168 conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle);
163 dst_process_handle);
164 169
165 cmd_buff[1] = RESULT_SUCCESS.raw; 170 cmd_buff[1] = RESULT_SUCCESS.raw;
166} 171}
167 172
168static void SetInputLineWidth(Service::Interface* self) { 173static void SetSendingV(Service::Interface* self) {
169 u32* cmd_buff = Kernel::GetCommandBuffer(); 174 u32* cmd_buff = Kernel::GetCommandBuffer();
170 175
171 conversion_params.input_line_width = cmd_buff[1]; 176 conversion.src_V.address = cmd_buff[1];
172 LOG_DEBUG(Service_Y2R, "input_line_width=%u", conversion_params.input_line_width); 177 conversion.src_V.image_size = cmd_buff[2];
178 conversion.src_V.transfer_unit = cmd_buff[3];
179 conversion.src_V.gap = cmd_buff[4];
180 u32 src_process_handle = cmd_buff[6];
181 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
182 "src_process_handle=0x%08X", conversion.src_V.image_size,
183 conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle);
173 184
174 cmd_buff[1] = RESULT_SUCCESS.raw; 185 cmd_buff[1] = RESULT_SUCCESS.raw;
175} 186}
176 187
177static void SetInputLines(Service::Interface* self) { 188static void SetSendingYUYV(Service::Interface* self) {
178 u32* cmd_buff = Kernel::GetCommandBuffer(); 189 u32* cmd_buff = Kernel::GetCommandBuffer();
179 190
180 conversion_params.input_lines = cmd_buff[1]; 191 conversion.src_YUYV.address = cmd_buff[1];
181 LOG_DEBUG(Service_Y2R, "input_line_number=%u", conversion_params.input_lines); 192 conversion.src_YUYV.image_size = cmd_buff[2];
193 conversion.src_YUYV.transfer_unit = cmd_buff[3];
194 conversion.src_YUYV.gap = cmd_buff[4];
195 u32 src_process_handle = cmd_buff[6];
196 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
197 "src_process_handle=0x%08X", conversion.src_YUYV.image_size,
198 conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle);
182 199
183 cmd_buff[1] = RESULT_SUCCESS.raw; 200 cmd_buff[1] = RESULT_SUCCESS.raw;
184} 201}
185 202
186static void StartConversion(Service::Interface* self) { 203static void SetReceiving(Service::Interface* self) {
187 u32* cmd_buff = Kernel::GetCommandBuffer(); 204 u32* cmd_buff = Kernel::GetCommandBuffer();
188 205
189 const u8* srcY_buffer = Memory::GetPointer(conversion_params.srcY_address); 206 conversion.dst.address = cmd_buff[1];
190 u8* dst_buffer = Memory::GetPointer(conversion_params.dst_address); 207 conversion.dst.image_size = cmd_buff[2];
191 208 conversion.dst.transfer_unit = cmd_buff[3];
192 // TODO: support color and other kinds of conversions 209 conversion.dst.gap = cmd_buff[4];
193 ASSERT(conversion_params.input_format == InputFormat::YUV422_Indiv8 210 u32 dst_process_handle = cmd_buff[6];
194 || conversion_params.input_format == InputFormat::YUV420_Indiv8); 211 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
195 ASSERT(conversion_params.output_format == OutputFormat::Rgb24); 212 "dst_process_handle=0x%08X", conversion.dst.image_size,
196 ASSERT(conversion_params.rotation == Rotation::None); 213 conversion.dst.transfer_unit, conversion.dst.gap,
197 const int bpp = 3; 214 dst_process_handle);
198
199 switch (conversion_params.alignment) {
200 case BlockAlignment::Linear:
201 {
202 const size_t input_lines = conversion_params.input_lines;
203 const size_t input_line_width = conversion_params.input_line_width;
204 const size_t srcY_stride = conversion_params.srcY_stride;
205 const size_t dst_stride = conversion_params.dst_stride;
206
207 size_t srcY_offset = 0;
208 size_t dst_offset = 0;
209
210 for (size_t line = 0; line < input_lines; ++line) {
211 for (size_t i = 0; i < input_line_width; ++i) {
212 u8 Y = srcY_buffer[srcY_offset];
213 dst_buffer[dst_offset + 0] = Y;
214 dst_buffer[dst_offset + 1] = Y;
215 dst_buffer[dst_offset + 2] = Y;
216
217 srcY_offset += 1;
218 dst_offset += bpp;
219 }
220 srcY_offset += srcY_stride;
221 dst_offset += dst_stride;
222 }
223 break;
224 }
225 case BlockAlignment::Block8x8:
226 {
227 const size_t input_lines = conversion_params.input_lines;
228 const size_t input_line_width = conversion_params.input_line_width;
229 const size_t srcY_stride = conversion_params.srcY_stride;
230 const size_t dst_transfer_unit = conversion_params.dst_transfer_unit;
231 const size_t dst_stride = conversion_params.dst_stride;
232 215
233 size_t srcY_offset = 0; 216 cmd_buff[1] = RESULT_SUCCESS.raw;
234 size_t dst_tile_line_offs = 0; 217}
235 218
236 const size_t tile_size = 8 * 8 * bpp; 219static void SetInputLineWidth(Service::Interface* self) {
220 u32* cmd_buff = Kernel::GetCommandBuffer();
237 221
238 for (size_t line = 0; line < input_lines;) { 222 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]);
239 size_t max_line = line + 8; 223 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw;
224}
240 225
241 for (; line < max_line; ++line) { 226static void SetInputLines(Service::Interface* self) {
242 for (size_t x = 0; x < input_line_width; ++x) { 227 u32* cmd_buff = Kernel::GetCommandBuffer();
243 size_t tile_x = x / 8;
244 228
245 size_t dst_tile_offs = dst_tile_line_offs + tile_x * tile_size; 229 LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]);
246 size_t tile_i = VideoCore::MortonInterleave((u32)x, (u32)line); 230 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw;
231}
247 232
248 size_t dst_offset = dst_tile_offs + tile_i * bpp; 233static void SetCoefficient(Service::Interface* self) {
234 u32* cmd_buff = Kernel::GetCommandBuffer();
249 235
250 u8 Y = srcY_buffer[srcY_offset]; 236 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]);
251 dst_buffer[dst_offset + 0] = Y; 237 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet));
252 dst_buffer[dst_offset + 1] = Y; 238 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]",
253 dst_buffer[dst_offset + 2] = Y; 239 coefficients[0], coefficients[1], coefficients[2], coefficients[3],
240 coefficients[4], coefficients[5], coefficients[6], coefficients[7]);
254 241
255 srcY_offset += 1; 242 cmd_buff[1] = RESULT_SUCCESS.raw;
256 } 243}
257 244
258 srcY_offset += srcY_stride; 245static void SetStandardCoefficient(Service::Interface* self) {
259 } 246 u32* cmd_buff = Kernel::GetCommandBuffer();
260 247
261 dst_tile_line_offs += dst_transfer_unit + dst_stride; 248 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]);
262 } 249
263 break; 250 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw;
264 } 251}
265 }
266 252
267 // dst_image_size would seem to be perfect for this, but it doesn't include the stride :( 253static void SetAlpha(Service::Interface* self) {
268 u32 total_output_size = conversion_params.input_lines * 254 u32* cmd_buff = Kernel::GetCommandBuffer();
269 (conversion_params.dst_transfer_unit + conversion_params.dst_stride); 255
256 conversion.alpha = cmd_buff[1];
257 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
258
259 cmd_buff[1] = RESULT_SUCCESS.raw;
260}
261
262static void StartConversion(Service::Interface* self) {
263 u32* cmd_buff = Kernel::GetCommandBuffer();
264
265 HW::Y2R::PerformConversion(conversion);
266
267 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
268 u32 total_output_size = conversion.input_lines *
269 (conversion.dst.transfer_unit + conversion.dst.gap);
270 VideoCore::g_renderer->hw_rasterizer->NotifyFlush( 270 VideoCore::g_renderer->hw_rasterizer->NotifyFlush(
271 Memory::VirtualToPhysicalAddress(conversion_params.dst_address), total_output_size); 271 Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
272 272
273 LOG_DEBUG(Service_Y2R, "called"); 273 LOG_DEBUG(Service_Y2R, "called");
274 completion_event->Signal(); 274 completion_event->Signal();
@@ -276,12 +276,20 @@ static void StartConversion(Service::Interface* self) {
276 cmd_buff[1] = RESULT_SUCCESS.raw; 276 cmd_buff[1] = RESULT_SUCCESS.raw;
277} 277}
278 278
279static void StopConversion(Service::Interface* self) {
280 u32* cmd_buff = Kernel::GetCommandBuffer();
281
282 cmd_buff[0] = 0x00270040;
283 cmd_buff[1] = RESULT_SUCCESS.raw;
284 LOG_DEBUG(Service_Y2R, "called");
285}
286
279/** 287/**
280* Y2R_U::IsBusyConversion service function 288 * Y2R_U::IsBusyConversion service function
281* Outputs: 289 * Outputs:
282* 1 : Result of function, 0 on success, otherwise error code 290 * 1 : Result of function, 0 on success, otherwise error code
283* 2 : 1 if there's a conversion running, otherwise 0. 291 * 2 : 1 if there's a conversion running, otherwise 0.
284*/ 292 */
285static void IsBusyConversion(Service::Interface* self) { 293static void IsBusyConversion(Service::Interface* self) {
286 u32* cmd_buff = Kernel::GetCommandBuffer(); 294 u32* cmd_buff = Kernel::GetCommandBuffer();
287 295
@@ -290,6 +298,40 @@ static void IsBusyConversion(Service::Interface* self) {
290 LOG_DEBUG(Service_Y2R, "called"); 298 LOG_DEBUG(Service_Y2R, "called");
291} 299}
292 300
301/**
302 * Y2R_U::SetConversionParams service function
303 */
304static void SetConversionParams(Service::Interface* self) {
305 u32* cmd_buff = Kernel::GetCommandBuffer();
306
307 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]);
308 LOG_DEBUG(Service_Y2R,
309 "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
310 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu "
311 "reserved=%hhu alpha=%hX",
312 params->input_format, params->output_format, params->rotation, params->block_alignment,
313 params->input_line_width, params->input_lines, params->standard_coefficient,
314 params->reserved, params->alpha);
315
316 ResultCode result = RESULT_SUCCESS;
317
318 conversion.input_format = params->input_format;
319 conversion.output_format = params->output_format;
320 conversion.rotation = params->rotation;
321 conversion.block_alignment = params->block_alignment;
322 result = conversion.SetInputLineWidth(params->input_line_width);
323 if (result.IsError()) goto cleanup;
324 result = conversion.SetInputLines(params->input_lines);
325 if (result.IsError()) goto cleanup;
326 result = conversion.SetStandardCoefficient(params->standard_coefficient);
327 if (result.IsError()) goto cleanup;
328 conversion.alpha = params->alpha;
329
330cleanup:
331 cmd_buff[0] = 0x00290040; // TODO verify
332 cmd_buff[1] = result.raw;
333}
334
293static void PingProcess(Service::Interface* self) { 335static void PingProcess(Service::Interface* self) {
294 u32* cmd_buff = Kernel::GetCommandBuffer(); 336 u32* cmd_buff = Kernel::GetCommandBuffer();
295 337
@@ -298,27 +340,63 @@ static void PingProcess(Service::Interface* self) {
298 LOG_WARNING(Service_Y2R, "(STUBBED) called"); 340 LOG_WARNING(Service_Y2R, "(STUBBED) called");
299} 341}
300 342
343static void DriverInitialize(Service::Interface* self) {
344 u32* cmd_buff = Kernel::GetCommandBuffer();
345
346 conversion.input_format = InputFormat::YUV422_Indiv8;
347 conversion.output_format = OutputFormat::RGBA8;
348 conversion.rotation = Rotation::None;
349 conversion.block_alignment = BlockAlignment::Linear;
350 conversion.coefficients.fill(0);
351 conversion.SetInputLineWidth(1024);
352 conversion.SetInputLines(1024);
353 conversion.alpha = 0;
354
355 ConversionBuffer zero_buffer = {};
356 conversion.src_Y = zero_buffer;
357 conversion.src_U = zero_buffer;
358 conversion.src_V = zero_buffer;
359 conversion.dst = zero_buffer;
360
361 completion_event->Clear();
362
363 cmd_buff[0] = 0x002B0040;
364 cmd_buff[1] = RESULT_SUCCESS.raw;
365 LOG_DEBUG(Service_Y2R, "called");
366}
367
368static void DriverFinalize(Service::Interface* self) {
369 u32* cmd_buff = Kernel::GetCommandBuffer();
370
371 cmd_buff[0] = 0x002C0040;
372 cmd_buff[1] = RESULT_SUCCESS.raw;
373 LOG_DEBUG(Service_Y2R, "called");
374}
375
301const Interface::FunctionInfo FunctionTable[] = { 376const Interface::FunctionInfo FunctionTable[] = {
302 {0x00010040, SetInputFormat, "SetInputFormat"}, 377 {0x00010040, SetInputFormat, "SetInputFormat"},
303 {0x00030040, SetOutputFormat, "SetOutputFormat"}, 378 {0x00030040, SetOutputFormat, "SetOutputFormat"},
304 {0x00050040, SetRotation, "SetRotation"}, 379 {0x00050040, SetRotation, "SetRotation"},
305 {0x00070040, SetBlockAlignment, "SetBlockAlignment"}, 380 {0x00070040, SetBlockAlignment, "SetBlockAlignment"},
306 {0x000D0040, nullptr, "SetTransferEndInterrupt"}, 381 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"},
307 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"}, 382 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"},
308 {0x00100102, SetSendingY, "SetSendingY"}, 383 {0x00100102, SetSendingY, "SetSendingY"},
309 {0x00110102, nullptr, "SetSendingU"}, 384 {0x00110102, SetSendingU, "SetSendingU"},
310 {0x00120102, nullptr, "SetSendingV"}, 385 {0x00120102, SetSendingV, "SetSendingV"},
386 {0x00130102, SetSendingYUYV, "SetSendingYUYV"},
311 {0x00180102, SetReceiving, "SetReceiving"}, 387 {0x00180102, SetReceiving, "SetReceiving"},
312 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"}, 388 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"},
313 {0x001C0040, SetInputLines, "SetInputLines"}, 389 {0x001C0040, SetInputLines, "SetInputLines"},
314 {0x00200040, nullptr, "SetStandardCoefficient"}, 390 {0x001E0100, SetCoefficient, "SetCoefficient"},
315 {0x00220040, nullptr, "SetAlpha"}, 391 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"},
392 {0x00220040, SetAlpha, "SetAlpha"},
316 {0x00260000, StartConversion, "StartConversion"}, 393 {0x00260000, StartConversion, "StartConversion"},
317 {0x00270000, nullptr, "StopConversion"}, 394 {0x00270000, StopConversion, "StopConversion"},
318 {0x00280000, IsBusyConversion, "IsBusyConversion"}, 395 {0x00280000, IsBusyConversion, "IsBusyConversion"},
396 {0x002901C0, SetConversionParams, "SetConversionParams"},
319 {0x002A0000, PingProcess, "PingProcess"}, 397 {0x002A0000, PingProcess, "PingProcess"},
320 {0x002B0000, nullptr, "DriverInitialize"}, 398 {0x002B0000, DriverInitialize, "DriverInitialize"},
321 {0x002C0000, nullptr, "DriverFinalize"}, 399 {0x002C0000, DriverFinalize, "DriverFinalize"},
322}; 400};
323 401
324//////////////////////////////////////////////////////////////////////////////////////////////////// 402////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -326,7 +404,7 @@ const Interface::FunctionInfo FunctionTable[] = {
326 404
327Interface::Interface() { 405Interface::Interface() {
328 completion_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "Y2R:Completed"); 406 completion_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "Y2R:Completed");
329 std::memset(&conversion_params, 0, sizeof(conversion_params)); 407 std::memset(&conversion, 0, sizeof(conversion));
330 408
331 Register(FunctionTable); 409 Register(FunctionTable);
332} 410}
diff --git a/src/core/hle/service/y2r_u.h b/src/core/hle/service/y2r_u.h
index 171aecfd1..7df47fcb9 100644
--- a/src/core/hle/service/y2r_u.h
+++ b/src/core/hle/service/y2r_u.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8
9#include "common/common_types.h"
10
7#include "core/hle/service/service.h" 11#include "core/hle/service/service.h"
8 12
9//////////////////////////////////////////////////////////////////////////////////////////////////// 13////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -11,6 +15,98 @@
11 15
12namespace Y2R_U { 16namespace Y2R_U {
13 17
18enum class InputFormat : u8 {
19 /// 8-bit input, with YUV components in separate planes and 4:2:2 subsampling.
20 YUV422_Indiv8 = 0,
21 /// 8-bit input, with YUV components in separate planes and 4:2:0 subsampling.
22 YUV420_Indiv8 = 1,
23
24 /// 16-bit input (only LSB used), with YUV components in separate planes and 4:2:2 subsampling.
25 YUV422_Indiv16 = 2,
26 /// 16-bit input (only LSB used), with YUV components in separate planes and 4:2:0 subsampling.
27 YUV420_Indiv16 = 3,
28
29 /// 8-bit input, with a single interleaved stream in YUYV format and 4:2:2 subsampling.
30 YUYV422_Interleaved = 4,
31};
32
33enum class OutputFormat : u8 {
34 RGBA8 = 0,
35 RGB8 = 1,
36 RGB5A1 = 2,
37 RGB565 = 3,
38};
39
40enum class Rotation : u8 {
41 None = 0,
42 Clockwise_90 = 1,
43 Clockwise_180 = 2,
44 Clockwise_270 = 3,
45};
46
47enum class BlockAlignment : u8 {
48 /// Image is output in linear format suitable for use as a framebuffer.
49 Linear = 0,
50 /// Image is output in tiled PICA format, suitable for use as a texture.
51 Block8x8 = 1,
52};
53
54enum class StandardCoefficient : u8 {
55 /// ITU Rec. BT.601 primaries, with PC ranges.
56 ITU_Rec601 = 0,
57 /// ITU Rec. BT.709 primaries, with PC ranges.
58 ITU_Rec709 = 1,
59 /// ITU Rec. BT.601 primaries, with TV ranges.
60 ITU_Rec601_Scaling = 2,
61 /// ITU Rec. BT.709 primaries, with TV ranges.
62 ITU_Rec709_Scaling = 3,
63};
64
65/**
66 * A set of coefficients configuring the RGB to YUV conversion. Coefficients 0-4 are unsigned 2.8
67 * fixed pointer numbers representing entries on the conversion matrix, while coefficient 5-7 are
68 * signed 11.5 fixed point numbers added as offsets to the RGB result.
69 *
70 * The overall conversion process formula is:
71 * ```
72 * R = trunc((c_0 * Y + c_1 * V) + c_5 + 0.75)
73 * G = trunc((c_0 * Y - c_3 * U - c_2 * V) + c_6 + 0.75)
74 * B = trunc((c_0 * Y + c_4 * U ) + c_7 + 0.75)
75 * ```
76 */
77using CoefficientSet = std::array<s16, 8>;
78
79struct ConversionBuffer {
80 /// Current reading/writing address of this buffer.
81 VAddr address;
82 /// Remaining amount of bytes to be DMAed, does not include the inter-trasfer gap.
83 u32 image_size;
84 /// Size of a single DMA transfer.
85 u16 transfer_unit;
86 /// Amount of bytes to be skipped between copying each `transfer_unit` bytes.
87 u16 gap;
88};
89
90struct ConversionConfiguration {
91 InputFormat input_format;
92 OutputFormat output_format;
93 Rotation rotation;
94 BlockAlignment block_alignment;
95 u16 input_line_width;
96 u16 input_lines;
97 CoefficientSet coefficients;
98 u16 alpha;
99
100 /// Input parameters for the Y (luma) plane
101 ConversionBuffer src_Y, src_U, src_V, src_YUYV;
102 /// Output parameters for the conversion results
103 ConversionBuffer dst;
104
105 ResultCode SetInputLineWidth(u16 width);
106 ResultCode SetInputLines(u16 lines);
107 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient);
108};
109
14class Interface : public Service::Interface { 110class Interface : public Service::Interface {
15public: 111public:
16 Interface(); 112 Interface();
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
new file mode 100644
index 000000000..5b7fb39e1
--- /dev/null
+++ b/src/core/hw/y2r.cpp
@@ -0,0 +1,369 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <numeric>
7
8#include "common/assert.h"
9#include "common/color.h"
10#include "common/common_types.h"
11#include "common/math_util.h"
12#include "common/vector_math.h"
13
14#include "core/hle/service/y2r_u.h"
15#include "core/memory.h"
16
17namespace HW {
18namespace Y2R {
19
20using namespace Y2R_U;
21
22static const size_t MAX_TILES = 1024 / 8;
23static const size_t TILE_SIZE = 8 * 8;
24using ImageTile = std::array<u32, TILE_SIZE>;
25
26/// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles.
27static void ConvertYUVToRGB(InputFormat input_format,
28 const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[],
29 unsigned int width, unsigned int height, const CoefficientSet& coefficients) {
30
31 for (unsigned int y = 0; y < height; ++y) {
32 for (unsigned int x = 0; x < width; ++x) {
33 s32 Y, U, V;
34 switch (input_format) {
35 case InputFormat::YUV422_Indiv8:
36 case InputFormat::YUV422_Indiv16:
37 Y = input_Y[y * width + x];
38 U = input_U[(y * width + x) / 2];
39 V = input_V[(y * width + x) / 2];
40 break;
41 case InputFormat::YUV420_Indiv8:
42 case InputFormat::YUV420_Indiv16:
43 Y = input_Y[y * width + x];
44 U = input_U[((y / 2) * width + x) / 2];
45 V = input_V[((y / 2) * width + x) / 2];
46 break;
47 case InputFormat::YUYV422_Interleaved:
48 Y = input_Y[(y * width + x) * 2];
49 U = input_Y[(y * width + (x / 2) * 2) * 2 + 1];
50 V = input_Y[(y * width + (x / 2) * 2) * 2 + 3];
51 break;
52 }
53
54 // This conversion process is bit-exact with hardware, as far as could be tested.
55 auto& c = coefficients;
56 s32 cY = c[0]*Y;
57
58 s32 r = cY + c[1]*V;
59 s32 g = cY - c[3]*U - c[2]*V;
60 s32 b = cY + c[4]*U;
61
62 const s32 rounding_offset = 0x18;
63 r = (r >> 3) + c[5] + rounding_offset;
64 g = (g >> 3) + c[6] + rounding_offset;
65 b = (b >> 3) + c[7] + rounding_offset;
66
67 unsigned int tile = x / 8;
68 unsigned int tile_x = x % 8;
69 u32* out = &output[tile][y * 8 + tile_x];
70
71 using MathUtil::Clamp;
72 *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) |
73 ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
74 ((u32)Clamp(b >> 5, 0, 0xFF) << 8);
75 }
76 }
77}
78
79/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit.
80template <size_t N>
81static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) {
82 const u8* input = Memory::GetPointer(buf.address);
83
84 size_t output_unit = buf.transfer_unit / N;
85 ASSERT(amount_of_data % output_unit == 0);
86
87 while (amount_of_data > 0) {
88 for (size_t i = 0; i < output_unit; ++i) {
89 output[i] = input[i * N];
90 }
91
92 output += output_unit;
93 input += buf.transfer_unit + buf.gap;
94
95 buf.address += buf.transfer_unit + buf.gap;
96 buf.image_size -= buf.transfer_unit;
97 amount_of_data -= output_unit;
98 }
99}
100
101/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer.
102static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data,
103 OutputFormat output_format, u8 alpha) {
104
105 u8* output = Memory::GetPointer(buf.address);
106
107 while (amount_of_data > 0) {
108 u8* unit_end = output + buf.transfer_unit;
109 while (output < unit_end) {
110 u32 color = *input++;
111 Math::Vec4<u8> col_vec{
112 (color >> 24) & 0xFF, (color >> 16) & 0xFF, (color >> 8) & 0xFF, alpha,
113 };
114
115 switch (output_format) {
116 case OutputFormat::RGBA8:
117 Color::EncodeRGBA8(col_vec, output);
118 output += 4;
119 break;
120 case OutputFormat::RGB8:
121 Color::EncodeRGB8(col_vec, output);
122 output += 3;
123 break;
124 case OutputFormat::RGB5A1:
125 Color::EncodeRGB5A1(col_vec, output);
126 output += 2;
127 break;
128 case OutputFormat::RGB565:
129 Color::EncodeRGB565(col_vec, output);
130 output += 2;
131 break;
132 }
133
134 amount_of_data -= 1;
135 }
136
137 output += buf.gap;
138 buf.address += buf.transfer_unit + buf.gap;
139 buf.image_size -= buf.transfer_unit;
140 }
141}
142
143static const u8 linear_lut[64] = {
144 0, 1, 2, 3, 4, 5, 6, 7,
145 8, 9, 10, 11, 12, 13, 14, 15,
146 16, 17, 18, 19, 20, 21, 22, 23,
147 24, 25, 26, 27, 28, 29, 30, 31,
148 32, 33, 34, 35, 36, 37, 38, 39,
149 40, 41, 42, 43, 44, 45, 46, 47,
150 48, 49, 50, 51, 52, 53, 54, 55,
151 56, 57, 58, 59, 60, 61, 62, 63,
152};
153
154static const u8 morton_lut[64] = {
155 0, 1, 4, 5, 16, 17, 20, 21,
156 2, 3, 6, 7, 18, 19, 22, 23,
157 8, 9, 12, 13, 24, 25, 28, 29,
158 10, 11, 14, 15, 26, 27, 30, 31,
159 32, 33, 36, 37, 48, 49, 52, 53,
160 34, 35, 38, 39, 50, 51, 54, 55,
161 40, 41, 44, 45, 56, 57, 60, 61,
162 42, 43, 46, 47, 58, 59, 62, 63,
163};
164
165static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
166 for (int i = 0; i < height * 8; ++i) {
167 output[out_map[i]] = input[i];
168 }
169}
170
171static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
172 int out_i = 0;
173 for (int x = 0; x < 8; ++x) {
174 for (int y = height - 1; y >= 0; --y) {
175 output[out_map[out_i++]] = input[y * 8 + x];
176 }
177 }
178}
179
180static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
181 int out_i = 0;
182 for (int i = height * 8 - 1; i >= 0; --i) {
183 output[out_map[out_i++]] = input[i];
184 }
185}
186
187static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
188 int out_i = 0;
189 for (int x = 8-1; x >= 0; --x) {
190 for (int y = 0; y < height; ++y) {
191 output[out_map[out_i++]] = input[y * 8 + x];
192 }
193 }
194}
195
196static void WriteTileToOutput(u32* output, const ImageTile& tile, int height, int line_stride) {
197 for (int y = 0; y < height; ++y) {
198 for (int x = 0; x < 8; ++x) {
199 output[y * line_stride + x] = tile[y * 8 + x];
200 }
201 }
202}
203
204/**
205 * Performs a Y2R colorspace conversion.
206 *
207 * The Y2R hardware implements hardware-accelerated YUV to RGB colorspace conversions. It is most
208 * commonly used for video playback or to display camera input to the screen.
209 *
210 * The conversion process is quite configurable, and can be divided in distinct steps. From
211 * observation, it appears that the hardware buffers a single 8-pixel tall strip of image data
212 * internally and converts it in one go before writing to the output and loading the next strip.
213 *
214 * The steps taken to convert one strip of image data are:
215 *
216 * - The hardware receives data via CDMA (http://3dbrew.org/wiki/Corelink_DMA_Engines), which is
217 * presumably stored in one or more internal buffers. This process can be done in several separate
218 * transfers, as long as they don't exceed the size of the internal image buffer. This allows
219 * flexibility in input strides.
220 * - The input data is decoded into a YUV tuple. Several formats are suported, see the `InputFormat`
221 * enum.
222 * - The YUV tuple is converted, using fixed point calculations, to RGB. This step can be configured
223 * using a set of coefficients to support different colorspace standards. See `CoefficientSet`.
224 * - The strip can be optionally rotated 90, 180 or 270 degrees. Since each strip is processed
225 * independently, this notably rotates each *strip*, not the entire image. This means that for 90
226 * or 270 degree rotations, the output will be in terms of several 8 x height images, and for any
227 * non-zero rotation the strips will have to be re-arranged so that the parts of the image will
228 * not be shuffled together. This limitation makes this a feature of somewhat dubious utility. 90
229 * or 270 degree rotations in images with non-even height don't seem to work properly.
230 * - The data is converted to the output RGB format. See the `OutputFormat` enum.
231 * - The data can be output either linearly line-by-line or in the swizzled 8x8 tile format used by
232 * the PICA. This is decided by the `BlockAlignment` enum. If 8x8 alignment is used, then the
233 * image must have a height divisible by 8. The image width must always be divisible by 8.
234 * - The final data is then CDMAed out to main memory and the next image strip is processed. This
235 * offers the same flexibility as the input stage.
236 *
237 * In this implementation, to avoid the combinatorial explosion of parameter combinations, common
238 * intermediate formats are used and where possible tables or parameters are used instead of
239 * diverging code paths to keep the amount of branches in check. Some steps are also merged to
240 * increase efficiency.
241 *
242 * Output for all valid settings combinations matches hardware, however output in some edge-cases
243 * differs:
244 *
245 * - `Block8x8` alignment with non-mod8 height produces different garbage patterns on the last
246 * strip, especially when combined with rotation.
247 * - Hardware, when using `Linear` alignment with a non-even height and 90 or 270 degree rotation
248 * produces misaligned output on the last strip. This implmentation produces output with the
249 * correct "expected" alignment.
250 *
251 * Hardware behaves strangely (doesn't fire the completion interrupt, for example) in these cases,
252 * so they are believed to be invalid configurations anyway.
253 */
254void PerformConversion(ConversionConfiguration& cvt) {
255 ASSERT(cvt.input_line_width % 8 == 0);
256 ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0);
257 // Tiles per row
258 size_t num_tiles = cvt.input_line_width / 8;
259 ASSERT(num_tiles < MAX_TILES);
260
261 // Buffer used as a CDMA source/target.
262 std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]);
263 // Intermediate storage for decoded 8x8 image tiles. Always stored as RGB32.
264 std::unique_ptr<ImageTile[]> tiles(new ImageTile[num_tiles]);
265 ImageTile tmp_tile;
266
267 // LUT used to remap writes to a tile. Used to allow linear or swizzled output without
268 // requiring two different code paths.
269 const u8* tile_remap;
270 switch (cvt.block_alignment) {
271 case BlockAlignment::Linear:
272 tile_remap = linear_lut; break;
273 case BlockAlignment::Block8x8:
274 tile_remap = morton_lut; break;
275 }
276
277 for (unsigned int y = 0; y < cvt.input_lines; y += 8) {
278 unsigned int row_height = std::min(cvt.input_lines - y, 8u);
279
280 // Total size in pixels of incoming data required for this strip.
281 const size_t row_data_size = row_height * cvt.input_line_width;
282
283 u8* input_Y = data_buffer.get();
284 u8* input_U = input_Y + 8 * cvt.input_line_width;
285 u8* input_V = input_U + 8 * cvt.input_line_width / 2;
286
287 switch (cvt.input_format) {
288 case InputFormat::YUV422_Indiv8:
289 ReceiveData<1>(input_Y, cvt.src_Y, row_data_size);
290 ReceiveData<1>(input_U, cvt.src_U, row_data_size / 2);
291 ReceiveData<1>(input_V, cvt.src_V, row_data_size / 2);
292 break;
293 case InputFormat::YUV420_Indiv8:
294 ReceiveData<1>(input_Y, cvt.src_Y, row_data_size);
295 ReceiveData<1>(input_U, cvt.src_U, row_data_size / 4);
296 ReceiveData<1>(input_V, cvt.src_V, row_data_size / 4);
297 break;
298 case InputFormat::YUV422_Indiv16:
299 ReceiveData<2>(input_Y, cvt.src_Y, row_data_size);
300 ReceiveData<2>(input_U, cvt.src_U, row_data_size / 2);
301 ReceiveData<2>(input_V, cvt.src_V, row_data_size / 2);
302 break;
303 case InputFormat::YUV420_Indiv16:
304 ReceiveData<2>(input_Y, cvt.src_Y, row_data_size);
305 ReceiveData<2>(input_U, cvt.src_U, row_data_size / 4);
306 ReceiveData<2>(input_V, cvt.src_V, row_data_size / 4);
307 break;
308 case InputFormat::YUYV422_Interleaved:
309 input_U = nullptr;
310 input_V = nullptr;
311 ReceiveData<1>(input_Y, cvt.src_YUYV, row_data_size * 2);
312 break;
313 }
314
315 // Note(yuriks): If additional optimization is required, input_format can be moved to a
316 // template parameter, so that its dispatch can be moved to outside the inner loop.
317 ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(),
318 cvt.input_line_width, row_height, cvt.coefficients);
319
320 u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get());
321
322 for (int i = 0; i < num_tiles; ++i) {
323 int image_strip_width, output_stride;
324
325 switch (cvt.rotation) {
326 case Rotation::None:
327 RotateTile0(tiles[i], tmp_tile, row_height, tile_remap);
328 image_strip_width = cvt.input_line_width;
329 output_stride = 8;
330 break;
331 case Rotation::Clockwise_90:
332 RotateTile90(tiles[i], tmp_tile, row_height, tile_remap);
333 image_strip_width = 8;
334 output_stride = 8 * row_height;
335 break;
336 case Rotation::Clockwise_180:
337 // For 180 and 270 degree rotations we also invert the order of tiles in the strip,
338 // since the rotates are done individually on each tile.
339 RotateTile180(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap);
340 image_strip_width = cvt.input_line_width;
341 output_stride = 8;
342 break;
343 case Rotation::Clockwise_270:
344 RotateTile270(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap);
345 image_strip_width = 8;
346 output_stride = 8 * row_height;
347 break;
348 }
349
350 switch (cvt.block_alignment) {
351 case BlockAlignment::Linear:
352 WriteTileToOutput(output_buffer, tmp_tile, row_height, image_strip_width);
353 output_buffer += output_stride;
354 break;
355 case BlockAlignment::Block8x8:
356 WriteTileToOutput(output_buffer, tmp_tile, 8, 8);
357 output_buffer += TILE_SIZE;
358 break;
359 }
360 }
361
362 // Note(yuriks): If additional optimization is required, output_format can be moved to a
363 // template parameter, so that its dispatch can be moved to outside the inner loop.
364 SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha);
365 }
366}
367
368}
369}
diff --git a/src/core/hw/y2r.h b/src/core/hw/y2r.h
new file mode 100644
index 000000000..729e1eee3
--- /dev/null
+++ b/src/core/hw/y2r.h
@@ -0,0 +1,15 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5namespace Y2R_U {
6 struct ConversionConfiguration;
7}
8
9namespace HW {
10namespace Y2R {
11
12void PerformConversion(Y2R_U::ConversionConfiguration& cvt);
13
14}
15}