diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/core/hle/service/y2r_u.cpp | 426 | ||||
| -rw-r--r-- | src/core/hle/service/y2r_u.h | 96 | ||||
| -rw-r--r-- | src/core/hw/y2r.cpp | 369 | ||||
| -rw-r--r-- | src/core/hw/y2r.h | 15 |
5 files changed, 734 insertions, 174 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 057b8ca0c..4fcda4874 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -108,6 +108,7 @@ set(SRCS | |||
| 108 | hw/gpu.cpp | 108 | hw/gpu.cpp |
| 109 | hw/hw.cpp | 109 | hw/hw.cpp |
| 110 | hw/lcd.cpp | 110 | hw/lcd.cpp |
| 111 | hw/y2r.cpp | ||
| 111 | loader/3dsx.cpp | 112 | loader/3dsx.cpp |
| 112 | loader/elf.cpp | 113 | loader/elf.cpp |
| 113 | loader/loader.cpp | 114 | loader/loader.cpp |
| @@ -233,6 +234,7 @@ set(HEADERS | |||
| 233 | hw/gpu.h | 234 | hw/gpu.h |
| 234 | hw/hw.h | 235 | hw/hw.h |
| 235 | hw/lcd.h | 236 | hw/lcd.h |
| 237 | hw/y2r.h | ||
| 236 | loader/3dsx.h | 238 | loader/3dsx.h |
| 237 | loader/elf.h | 239 | loader/elf.h |
| 238 | loader/loader.h | 240 | loader/loader.h |
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp index 19bfde756..17cb4f0f0 100644 --- a/src/core/hle/service/y2r_u.cpp +++ b/src/core/hle/service/y2r_u.cpp | |||
| @@ -9,8 +9,8 @@ | |||
| 9 | #include "core/hle/hle.h" | 9 | #include "core/hle/hle.h" |
| 10 | #include "core/hle/kernel/event.h" | 10 | #include "core/hle/kernel/event.h" |
| 11 | #include "core/hle/service/y2r_u.h" | 11 | #include "core/hle/service/y2r_u.h" |
| 12 | #include "core/hw/y2r.h" | ||
| 12 | #include "core/mem_map.h" | 13 | #include "core/mem_map.h" |
| 13 | #include "core/memory.h" | ||
| 14 | 14 | ||
| 15 | #include "video_core/utils.h" | 15 | #include "video_core/utils.h" |
| 16 | #include "video_core/video_core.h" | 16 | #include "video_core/video_core.h" |
| @@ -20,75 +20,73 @@ | |||
| 20 | 20 | ||
| 21 | namespace Y2R_U { | 21 | namespace Y2R_U { |
| 22 | 22 | ||
| 23 | enum class InputFormat { | 23 | struct ConversionParameters { |
| 24 | /// 8-bit input, with YUV components in separate planes and using 4:2:2 subsampling. | 24 | InputFormat input_format; |
| 25 | YUV422_Indiv8 = 0, | 25 | OutputFormat output_format; |
| 26 | /// 8-bit input, with YUV components in separate planes and using 4:2:0 subsampling. | 26 | Rotation rotation; |
| 27 | YUV420_Indiv8 = 1, | 27 | BlockAlignment block_alignment; |
| 28 | 28 | u16 input_line_width; | |
| 29 | YUV422_INDIV_16 = 2, | 29 | u16 input_lines; |
| 30 | YUV420_INDIV_16 = 3, | 30 | StandardCoefficient standard_coefficient; |
| 31 | YUV422_BATCH = 4, | 31 | u8 reserved; |
| 32 | u16 alpha; | ||
| 32 | }; | 33 | }; |
| 34 | static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size"); | ||
| 33 | 35 | ||
| 34 | enum class OutputFormat { | 36 | static Kernel::SharedPtr<Kernel::Event> completion_event; |
| 35 | Rgb32 = 0, | 37 | static ConversionConfiguration conversion; |
| 36 | Rgb24 = 1, | ||
| 37 | Rgb16_555 = 2, | ||
| 38 | Rgb16_565 = 3, | ||
| 39 | }; | ||
| 40 | 38 | ||
| 41 | enum class Rotation { | 39 | static const CoefficientSet standard_coefficients[4] = { |
| 42 | None = 0, | 40 | {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601 |
| 43 | Clockwise_90 = 1, | 41 | {{ 0x100, 0x193, 0x77, 0x2F, 0x1DB, -0x1933, 0xA7C, -0x1D51 }}, // ITU_Rec709 |
| 44 | Clockwise_180 = 2, | 42 | {{ 0x12A, 0x198, 0xD0, 0x64, 0x204, -0x1BDE, 0x10F2, -0x229B }}, // ITU_Rec601_Scaling |
| 45 | Clockwise_270 = 3, | 43 | {{ 0x12A, 0x1CA, 0x88, 0x36, 0x21C, -0x1F04, 0x99C, -0x2421 }}, // ITU_Rec709_Scaling |
| 46 | }; | 44 | }; |
| 47 | 45 | ||
| 48 | enum class BlockAlignment { | 46 | ResultCode ConversionConfiguration::SetInputLineWidth(u16 width) { |
| 49 | /// Image is output in linear format suitable for use as a framebuffer. | 47 | if (width == 0 || width > 1024 || width % 8 != 0) { |
| 50 | Linear = 0, | 48 | return ResultCode(ErrorDescription::OutOfRange, ErrorModule::CAM, |
| 51 | /// Image is output in tiled PICA format, suitable for use as a texture. | 49 | ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053FD |
| 52 | Block8x8 = 1, | 50 | } |
| 53 | }; | ||
| 54 | 51 | ||
| 55 | enum class StandardCoefficient { | 52 | // Note: The hardware uses the register value 0 to represent a width of 1024, so for a width of |
| 56 | ITU_Rec601 = 0, | 53 | // 1024 the `camera` module would set the value 0 here, but we don't need to emulate this |
| 57 | ITU_Rec709 = 1, | 54 | // internal detail. |
| 58 | ITU_Rec601_Scaling = 2, | 55 | this->input_line_width = width; |
| 59 | ITU_Rec709_Scaling = 3, | 56 | return RESULT_SUCCESS; |
| 60 | }; | 57 | } |
| 61 | 58 | ||
| 62 | static Kernel::SharedPtr<Kernel::Event> completion_event; | 59 | ResultCode ConversionConfiguration::SetInputLines(u16 lines) { |
| 60 | if (lines == 0 || lines > 1024) { | ||
| 61 | return ResultCode(ErrorDescription::OutOfRange, ErrorModule::CAM, | ||
| 62 | ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053FD | ||
| 63 | } | ||
| 63 | 64 | ||
| 64 | struct ConversionParameters { | 65 | // Note: In what appears to be a bug, the `camera` module does not set the hardware register at |
| 65 | InputFormat input_format; | 66 | // all if `lines` is 1024, so the conversion uses the last value that was set. The intention |
| 66 | OutputFormat output_format; | 67 | // was probably to set it to 0 like in SetInputLineWidth. |
| 67 | Rotation rotation; | 68 | if (lines != 1024) { |
| 68 | BlockAlignment alignment; | 69 | this->input_lines = lines; |
| 69 | u16 input_line_width; | 70 | } |
| 70 | u16 input_lines; | 71 | return RESULT_SUCCESS; |
| 72 | } | ||
| 71 | 73 | ||
| 72 | // Input parameters for the Y (luma) plane | 74 | ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) { |
| 73 | VAddr srcY_address; | 75 | size_t index = static_cast<size_t>(standard_coefficient); |
| 74 | u32 srcY_image_size; | 76 | if (index >= 4) { |
| 75 | u16 srcY_transfer_unit; | 77 | return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM, |
| 76 | u16 srcY_stride; | 78 | ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED |
| 77 | 79 | } | |
| 78 | // Output parameters for the conversion results | ||
| 79 | VAddr dst_address; | ||
| 80 | u32 dst_image_size; | ||
| 81 | u16 dst_transfer_unit; | ||
| 82 | u16 dst_stride; | ||
| 83 | }; | ||
| 84 | 80 | ||
| 85 | static ConversionParameters conversion_params; | 81 | std::memcpy(coefficients.data(), standard_coefficients[index].data(), sizeof(coefficients)); |
| 82 | return RESULT_SUCCESS; | ||
| 83 | } | ||
| 86 | 84 | ||
| 87 | static void SetInputFormat(Service::Interface* self) { | 85 | static void SetInputFormat(Service::Interface* self) { |
| 88 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 86 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 89 | 87 | ||
| 90 | conversion_params.input_format = static_cast<InputFormat>(cmd_buff[1]); | 88 | conversion.input_format = static_cast<InputFormat>(cmd_buff[1]); |
| 91 | LOG_DEBUG(Service_Y2R, "called input_format=%u", conversion_params.input_format); | 89 | LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format); |
| 92 | 90 | ||
| 93 | cmd_buff[1] = RESULT_SUCCESS.raw; | 91 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 94 | } | 92 | } |
| @@ -96,8 +94,8 @@ static void SetInputFormat(Service::Interface* self) { | |||
| 96 | static void SetOutputFormat(Service::Interface* self) { | 94 | static void SetOutputFormat(Service::Interface* self) { |
| 97 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 95 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 98 | 96 | ||
| 99 | conversion_params.output_format = static_cast<OutputFormat>(cmd_buff[1]); | 97 | conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]); |
| 100 | LOG_DEBUG(Service_Y2R, "called output_format=%u", conversion_params.output_format); | 98 | LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format); |
| 101 | 99 | ||
| 102 | cmd_buff[1] = RESULT_SUCCESS.raw; | 100 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 103 | } | 101 | } |
| @@ -105,8 +103,8 @@ static void SetOutputFormat(Service::Interface* self) { | |||
| 105 | static void SetRotation(Service::Interface* self) { | 103 | static void SetRotation(Service::Interface* self) { |
| 106 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 104 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 107 | 105 | ||
| 108 | conversion_params.rotation = static_cast<Rotation>(cmd_buff[1]); | 106 | conversion.rotation = static_cast<Rotation>(cmd_buff[1]); |
| 109 | LOG_DEBUG(Service_Y2R, "called rotation=%u", conversion_params.rotation); | 107 | LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation); |
| 110 | 108 | ||
| 111 | cmd_buff[1] = RESULT_SUCCESS.raw; | 109 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 112 | } | 110 | } |
| @@ -114,18 +112,26 @@ static void SetRotation(Service::Interface* self) { | |||
| 114 | static void SetBlockAlignment(Service::Interface* self) { | 112 | static void SetBlockAlignment(Service::Interface* self) { |
| 115 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 113 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 116 | 114 | ||
| 117 | conversion_params.alignment = static_cast<BlockAlignment>(cmd_buff[1]); | 115 | conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]); |
| 118 | LOG_DEBUG(Service_Y2R, "called alignment=%u", conversion_params.alignment); | 116 | LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment); |
| 119 | 117 | ||
| 120 | cmd_buff[1] = RESULT_SUCCESS.raw; | 118 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 121 | } | 119 | } |
| 122 | 120 | ||
| 121 | static void SetTransferEndInterrupt(Service::Interface* self) { | ||
| 122 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 123 | |||
| 124 | cmd_buff[0] = 0x000D0040; | ||
| 125 | cmd_buff[1] = RESULT_SUCCESS.raw; | ||
| 126 | LOG_DEBUG(Service_Y2R, "(STUBBED) called"); | ||
| 127 | } | ||
| 128 | |||
| 123 | /** | 129 | /** |
| 124 | * Y2R_U::GetTransferEndEvent service function | 130 | * Y2R_U::GetTransferEndEvent service function |
| 125 | * Outputs: | 131 | * Outputs: |
| 126 | * 1 : Result of function, 0 on success, otherwise error code | 132 | * 1 : Result of function, 0 on success, otherwise error code |
| 127 | * 3 : The handle of the completion event | 133 | * 3 : The handle of the completion event |
| 128 | */ | 134 | */ |
| 129 | static void GetTransferEndEvent(Service::Interface* self) { | 135 | static void GetTransferEndEvent(Service::Interface* self) { |
| 130 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 136 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 131 | 137 | ||
| @@ -137,138 +143,132 @@ static void GetTransferEndEvent(Service::Interface* self) { | |||
| 137 | static void SetSendingY(Service::Interface* self) { | 143 | static void SetSendingY(Service::Interface* self) { |
| 138 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 144 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 139 | 145 | ||
| 140 | conversion_params.srcY_address = cmd_buff[1]; | 146 | conversion.src_Y.address = cmd_buff[1]; |
| 141 | conversion_params.srcY_image_size = cmd_buff[2]; | 147 | conversion.src_Y.image_size = cmd_buff[2]; |
| 142 | conversion_params.srcY_transfer_unit = cmd_buff[3]; | 148 | conversion.src_Y.transfer_unit = cmd_buff[3]; |
| 143 | conversion_params.srcY_stride = cmd_buff[4]; | 149 | conversion.src_Y.gap = cmd_buff[4]; |
| 144 | u32 src_process_handle = cmd_buff[6]; | 150 | u32 src_process_handle = cmd_buff[6]; |
| 145 | LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " | 151 | LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " |
| 146 | "src_process_handle=0x%08X", conversion_params.srcY_image_size, | 152 | "src_process_handle=0x%08X", conversion.src_Y.image_size, |
| 147 | conversion_params.srcY_transfer_unit, conversion_params.srcY_stride, src_process_handle); | 153 | conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle); |
| 148 | 154 | ||
| 149 | cmd_buff[1] = RESULT_SUCCESS.raw; | 155 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 150 | } | 156 | } |
| 151 | 157 | ||
| 152 | static void SetReceiving(Service::Interface* self) { | 158 | static void SetSendingU(Service::Interface* self) { |
| 153 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 159 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 154 | 160 | ||
| 155 | conversion_params.dst_address = cmd_buff[1]; | 161 | conversion.src_U.address = cmd_buff[1]; |
| 156 | conversion_params.dst_image_size = cmd_buff[2]; | 162 | conversion.src_U.image_size = cmd_buff[2]; |
| 157 | conversion_params.dst_transfer_unit = cmd_buff[3]; | 163 | conversion.src_U.transfer_unit = cmd_buff[3]; |
| 158 | conversion_params.dst_stride = cmd_buff[4]; | 164 | conversion.src_U.gap = cmd_buff[4]; |
| 159 | u32 dst_process_handle = cmd_buff[6]; | 165 | u32 src_process_handle = cmd_buff[6]; |
| 160 | LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " | 166 | LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " |
| 161 | "dst_process_handle=0x%08X", conversion_params.dst_image_size, | 167 | "src_process_handle=0x%08X", conversion.src_U.image_size, |
| 162 | conversion_params.dst_transfer_unit, conversion_params.dst_stride, | 168 | conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle); |
| 163 | dst_process_handle); | ||
| 164 | 169 | ||
| 165 | cmd_buff[1] = RESULT_SUCCESS.raw; | 170 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 166 | } | 171 | } |
| 167 | 172 | ||
| 168 | static void SetInputLineWidth(Service::Interface* self) { | 173 | static void SetSendingV(Service::Interface* self) { |
| 169 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 174 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 170 | 175 | ||
| 171 | conversion_params.input_line_width = cmd_buff[1]; | 176 | conversion.src_V.address = cmd_buff[1]; |
| 172 | LOG_DEBUG(Service_Y2R, "input_line_width=%u", conversion_params.input_line_width); | 177 | conversion.src_V.image_size = cmd_buff[2]; |
| 178 | conversion.src_V.transfer_unit = cmd_buff[3]; | ||
| 179 | conversion.src_V.gap = cmd_buff[4]; | ||
| 180 | u32 src_process_handle = cmd_buff[6]; | ||
| 181 | LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " | ||
| 182 | "src_process_handle=0x%08X", conversion.src_V.image_size, | ||
| 183 | conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle); | ||
| 173 | 184 | ||
| 174 | cmd_buff[1] = RESULT_SUCCESS.raw; | 185 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 175 | } | 186 | } |
| 176 | 187 | ||
| 177 | static void SetInputLines(Service::Interface* self) { | 188 | static void SetSendingYUYV(Service::Interface* self) { |
| 178 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 189 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 179 | 190 | ||
| 180 | conversion_params.input_lines = cmd_buff[1]; | 191 | conversion.src_YUYV.address = cmd_buff[1]; |
| 181 | LOG_DEBUG(Service_Y2R, "input_line_number=%u", conversion_params.input_lines); | 192 | conversion.src_YUYV.image_size = cmd_buff[2]; |
| 193 | conversion.src_YUYV.transfer_unit = cmd_buff[3]; | ||
| 194 | conversion.src_YUYV.gap = cmd_buff[4]; | ||
| 195 | u32 src_process_handle = cmd_buff[6]; | ||
| 196 | LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " | ||
| 197 | "src_process_handle=0x%08X", conversion.src_YUYV.image_size, | ||
| 198 | conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle); | ||
| 182 | 199 | ||
| 183 | cmd_buff[1] = RESULT_SUCCESS.raw; | 200 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 184 | } | 201 | } |
| 185 | 202 | ||
| 186 | static void StartConversion(Service::Interface* self) { | 203 | static void SetReceiving(Service::Interface* self) { |
| 187 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 204 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 188 | 205 | ||
| 189 | const u8* srcY_buffer = Memory::GetPointer(conversion_params.srcY_address); | 206 | conversion.dst.address = cmd_buff[1]; |
| 190 | u8* dst_buffer = Memory::GetPointer(conversion_params.dst_address); | 207 | conversion.dst.image_size = cmd_buff[2]; |
| 191 | 208 | conversion.dst.transfer_unit = cmd_buff[3]; | |
| 192 | // TODO: support color and other kinds of conversions | 209 | conversion.dst.gap = cmd_buff[4]; |
| 193 | ASSERT(conversion_params.input_format == InputFormat::YUV422_Indiv8 | 210 | u32 dst_process_handle = cmd_buff[6]; |
| 194 | || conversion_params.input_format == InputFormat::YUV420_Indiv8); | 211 | LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " |
| 195 | ASSERT(conversion_params.output_format == OutputFormat::Rgb24); | 212 | "dst_process_handle=0x%08X", conversion.dst.image_size, |
| 196 | ASSERT(conversion_params.rotation == Rotation::None); | 213 | conversion.dst.transfer_unit, conversion.dst.gap, |
| 197 | const int bpp = 3; | 214 | dst_process_handle); |
| 198 | |||
| 199 | switch (conversion_params.alignment) { | ||
| 200 | case BlockAlignment::Linear: | ||
| 201 | { | ||
| 202 | const size_t input_lines = conversion_params.input_lines; | ||
| 203 | const size_t input_line_width = conversion_params.input_line_width; | ||
| 204 | const size_t srcY_stride = conversion_params.srcY_stride; | ||
| 205 | const size_t dst_stride = conversion_params.dst_stride; | ||
| 206 | |||
| 207 | size_t srcY_offset = 0; | ||
| 208 | size_t dst_offset = 0; | ||
| 209 | |||
| 210 | for (size_t line = 0; line < input_lines; ++line) { | ||
| 211 | for (size_t i = 0; i < input_line_width; ++i) { | ||
| 212 | u8 Y = srcY_buffer[srcY_offset]; | ||
| 213 | dst_buffer[dst_offset + 0] = Y; | ||
| 214 | dst_buffer[dst_offset + 1] = Y; | ||
| 215 | dst_buffer[dst_offset + 2] = Y; | ||
| 216 | |||
| 217 | srcY_offset += 1; | ||
| 218 | dst_offset += bpp; | ||
| 219 | } | ||
| 220 | srcY_offset += srcY_stride; | ||
| 221 | dst_offset += dst_stride; | ||
| 222 | } | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | case BlockAlignment::Block8x8: | ||
| 226 | { | ||
| 227 | const size_t input_lines = conversion_params.input_lines; | ||
| 228 | const size_t input_line_width = conversion_params.input_line_width; | ||
| 229 | const size_t srcY_stride = conversion_params.srcY_stride; | ||
| 230 | const size_t dst_transfer_unit = conversion_params.dst_transfer_unit; | ||
| 231 | const size_t dst_stride = conversion_params.dst_stride; | ||
| 232 | 215 | ||
| 233 | size_t srcY_offset = 0; | 216 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 234 | size_t dst_tile_line_offs = 0; | 217 | } |
| 235 | 218 | ||
| 236 | const size_t tile_size = 8 * 8 * bpp; | 219 | static void SetInputLineWidth(Service::Interface* self) { |
| 220 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 237 | 221 | ||
| 238 | for (size_t line = 0; line < input_lines;) { | 222 | LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]); |
| 239 | size_t max_line = line + 8; | 223 | cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw; |
| 224 | } | ||
| 240 | 225 | ||
| 241 | for (; line < max_line; ++line) { | 226 | static void SetInputLines(Service::Interface* self) { |
| 242 | for (size_t x = 0; x < input_line_width; ++x) { | 227 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 243 | size_t tile_x = x / 8; | ||
| 244 | 228 | ||
| 245 | size_t dst_tile_offs = dst_tile_line_offs + tile_x * tile_size; | 229 | LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]); |
| 246 | size_t tile_i = VideoCore::MortonInterleave((u32)x, (u32)line); | 230 | cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw; |
| 231 | } | ||
| 247 | 232 | ||
| 248 | size_t dst_offset = dst_tile_offs + tile_i * bpp; | 233 | static void SetCoefficient(Service::Interface* self) { |
| 234 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 249 | 235 | ||
| 250 | u8 Y = srcY_buffer[srcY_offset]; | 236 | const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]); |
| 251 | dst_buffer[dst_offset + 0] = Y; | 237 | std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet)); |
| 252 | dst_buffer[dst_offset + 1] = Y; | 238 | LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]", |
| 253 | dst_buffer[dst_offset + 2] = Y; | 239 | coefficients[0], coefficients[1], coefficients[2], coefficients[3], |
| 240 | coefficients[4], coefficients[5], coefficients[6], coefficients[7]); | ||
| 254 | 241 | ||
| 255 | srcY_offset += 1; | 242 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 256 | } | 243 | } |
| 257 | 244 | ||
| 258 | srcY_offset += srcY_stride; | 245 | static void SetStandardCoefficient(Service::Interface* self) { |
| 259 | } | 246 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 260 | 247 | ||
| 261 | dst_tile_line_offs += dst_transfer_unit + dst_stride; | 248 | LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]); |
| 262 | } | 249 | |
| 263 | break; | 250 | cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw; |
| 264 | } | 251 | } |
| 265 | } | ||
| 266 | 252 | ||
| 267 | // dst_image_size would seem to be perfect for this, but it doesn't include the stride :( | 253 | static void SetAlpha(Service::Interface* self) { |
| 268 | u32 total_output_size = conversion_params.input_lines * | 254 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 269 | (conversion_params.dst_transfer_unit + conversion_params.dst_stride); | 255 | |
| 256 | conversion.alpha = cmd_buff[1]; | ||
| 257 | LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha); | ||
| 258 | |||
| 259 | cmd_buff[1] = RESULT_SUCCESS.raw; | ||
| 260 | } | ||
| 261 | |||
| 262 | static void StartConversion(Service::Interface* self) { | ||
| 263 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 264 | |||
| 265 | HW::Y2R::PerformConversion(conversion); | ||
| 266 | |||
| 267 | // dst_image_size would seem to be perfect for this, but it doesn't include the gap :( | ||
| 268 | u32 total_output_size = conversion.input_lines * | ||
| 269 | (conversion.dst.transfer_unit + conversion.dst.gap); | ||
| 270 | VideoCore::g_renderer->hw_rasterizer->NotifyFlush( | 270 | VideoCore::g_renderer->hw_rasterizer->NotifyFlush( |
| 271 | Memory::VirtualToPhysicalAddress(conversion_params.dst_address), total_output_size); | 271 | Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size); |
| 272 | 272 | ||
| 273 | LOG_DEBUG(Service_Y2R, "called"); | 273 | LOG_DEBUG(Service_Y2R, "called"); |
| 274 | completion_event->Signal(); | 274 | completion_event->Signal(); |
| @@ -276,12 +276,20 @@ static void StartConversion(Service::Interface* self) { | |||
| 276 | cmd_buff[1] = RESULT_SUCCESS.raw; | 276 | cmd_buff[1] = RESULT_SUCCESS.raw; |
| 277 | } | 277 | } |
| 278 | 278 | ||
| 279 | static void StopConversion(Service::Interface* self) { | ||
| 280 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 281 | |||
| 282 | cmd_buff[0] = 0x00270040; | ||
| 283 | cmd_buff[1] = RESULT_SUCCESS.raw; | ||
| 284 | LOG_DEBUG(Service_Y2R, "called"); | ||
| 285 | } | ||
| 286 | |||
| 279 | /** | 287 | /** |
| 280 | * Y2R_U::IsBusyConversion service function | 288 | * Y2R_U::IsBusyConversion service function |
| 281 | * Outputs: | 289 | * Outputs: |
| 282 | * 1 : Result of function, 0 on success, otherwise error code | 290 | * 1 : Result of function, 0 on success, otherwise error code |
| 283 | * 2 : 1 if there's a conversion running, otherwise 0. | 291 | * 2 : 1 if there's a conversion running, otherwise 0. |
| 284 | */ | 292 | */ |
| 285 | static void IsBusyConversion(Service::Interface* self) { | 293 | static void IsBusyConversion(Service::Interface* self) { |
| 286 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 294 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 287 | 295 | ||
| @@ -290,6 +298,40 @@ static void IsBusyConversion(Service::Interface* self) { | |||
| 290 | LOG_DEBUG(Service_Y2R, "called"); | 298 | LOG_DEBUG(Service_Y2R, "called"); |
| 291 | } | 299 | } |
| 292 | 300 | ||
| 301 | /** | ||
| 302 | * Y2R_U::SetConversionParams service function | ||
| 303 | */ | ||
| 304 | static void SetConversionParams(Service::Interface* self) { | ||
| 305 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 306 | |||
| 307 | auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]); | ||
| 308 | LOG_DEBUG(Service_Y2R, | ||
| 309 | "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu " | ||
| 310 | "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu " | ||
| 311 | "reserved=%hhu alpha=%hX", | ||
| 312 | params->input_format, params->output_format, params->rotation, params->block_alignment, | ||
| 313 | params->input_line_width, params->input_lines, params->standard_coefficient, | ||
| 314 | params->reserved, params->alpha); | ||
| 315 | |||
| 316 | ResultCode result = RESULT_SUCCESS; | ||
| 317 | |||
| 318 | conversion.input_format = params->input_format; | ||
| 319 | conversion.output_format = params->output_format; | ||
| 320 | conversion.rotation = params->rotation; | ||
| 321 | conversion.block_alignment = params->block_alignment; | ||
| 322 | result = conversion.SetInputLineWidth(params->input_line_width); | ||
| 323 | if (result.IsError()) goto cleanup; | ||
| 324 | result = conversion.SetInputLines(params->input_lines); | ||
| 325 | if (result.IsError()) goto cleanup; | ||
| 326 | result = conversion.SetStandardCoefficient(params->standard_coefficient); | ||
| 327 | if (result.IsError()) goto cleanup; | ||
| 328 | conversion.alpha = params->alpha; | ||
| 329 | |||
| 330 | cleanup: | ||
| 331 | cmd_buff[0] = 0x00290040; // TODO verify | ||
| 332 | cmd_buff[1] = result.raw; | ||
| 333 | } | ||
| 334 | |||
| 293 | static void PingProcess(Service::Interface* self) { | 335 | static void PingProcess(Service::Interface* self) { |
| 294 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 336 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 295 | 337 | ||
| @@ -298,27 +340,63 @@ static void PingProcess(Service::Interface* self) { | |||
| 298 | LOG_WARNING(Service_Y2R, "(STUBBED) called"); | 340 | LOG_WARNING(Service_Y2R, "(STUBBED) called"); |
| 299 | } | 341 | } |
| 300 | 342 | ||
| 343 | static void DriverInitialize(Service::Interface* self) { | ||
| 344 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 345 | |||
| 346 | conversion.input_format = InputFormat::YUV422_Indiv8; | ||
| 347 | conversion.output_format = OutputFormat::RGBA8; | ||
| 348 | conversion.rotation = Rotation::None; | ||
| 349 | conversion.block_alignment = BlockAlignment::Linear; | ||
| 350 | conversion.coefficients.fill(0); | ||
| 351 | conversion.SetInputLineWidth(1024); | ||
| 352 | conversion.SetInputLines(1024); | ||
| 353 | conversion.alpha = 0; | ||
| 354 | |||
| 355 | ConversionBuffer zero_buffer = {}; | ||
| 356 | conversion.src_Y = zero_buffer; | ||
| 357 | conversion.src_U = zero_buffer; | ||
| 358 | conversion.src_V = zero_buffer; | ||
| 359 | conversion.dst = zero_buffer; | ||
| 360 | |||
| 361 | completion_event->Clear(); | ||
| 362 | |||
| 363 | cmd_buff[0] = 0x002B0040; | ||
| 364 | cmd_buff[1] = RESULT_SUCCESS.raw; | ||
| 365 | LOG_DEBUG(Service_Y2R, "called"); | ||
| 366 | } | ||
| 367 | |||
| 368 | static void DriverFinalize(Service::Interface* self) { | ||
| 369 | u32* cmd_buff = Kernel::GetCommandBuffer(); | ||
| 370 | |||
| 371 | cmd_buff[0] = 0x002C0040; | ||
| 372 | cmd_buff[1] = RESULT_SUCCESS.raw; | ||
| 373 | LOG_DEBUG(Service_Y2R, "called"); | ||
| 374 | } | ||
| 375 | |||
| 301 | const Interface::FunctionInfo FunctionTable[] = { | 376 | const Interface::FunctionInfo FunctionTable[] = { |
| 302 | {0x00010040, SetInputFormat, "SetInputFormat"}, | 377 | {0x00010040, SetInputFormat, "SetInputFormat"}, |
| 303 | {0x00030040, SetOutputFormat, "SetOutputFormat"}, | 378 | {0x00030040, SetOutputFormat, "SetOutputFormat"}, |
| 304 | {0x00050040, SetRotation, "SetRotation"}, | 379 | {0x00050040, SetRotation, "SetRotation"}, |
| 305 | {0x00070040, SetBlockAlignment, "SetBlockAlignment"}, | 380 | {0x00070040, SetBlockAlignment, "SetBlockAlignment"}, |
| 306 | {0x000D0040, nullptr, "SetTransferEndInterrupt"}, | 381 | {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"}, |
| 307 | {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"}, | 382 | {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"}, |
| 308 | {0x00100102, SetSendingY, "SetSendingY"}, | 383 | {0x00100102, SetSendingY, "SetSendingY"}, |
| 309 | {0x00110102, nullptr, "SetSendingU"}, | 384 | {0x00110102, SetSendingU, "SetSendingU"}, |
| 310 | {0x00120102, nullptr, "SetSendingV"}, | 385 | {0x00120102, SetSendingV, "SetSendingV"}, |
| 386 | {0x00130102, SetSendingYUYV, "SetSendingYUYV"}, | ||
| 311 | {0x00180102, SetReceiving, "SetReceiving"}, | 387 | {0x00180102, SetReceiving, "SetReceiving"}, |
| 312 | {0x001A0040, SetInputLineWidth, "SetInputLineWidth"}, | 388 | {0x001A0040, SetInputLineWidth, "SetInputLineWidth"}, |
| 313 | {0x001C0040, SetInputLines, "SetInputLines"}, | 389 | {0x001C0040, SetInputLines, "SetInputLines"}, |
| 314 | {0x00200040, nullptr, "SetStandardCoefficient"}, | 390 | {0x001E0100, SetCoefficient, "SetCoefficient"}, |
| 315 | {0x00220040, nullptr, "SetAlpha"}, | 391 | {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"}, |
| 392 | {0x00220040, SetAlpha, "SetAlpha"}, | ||
| 316 | {0x00260000, StartConversion, "StartConversion"}, | 393 | {0x00260000, StartConversion, "StartConversion"}, |
| 317 | {0x00270000, nullptr, "StopConversion"}, | 394 | {0x00270000, StopConversion, "StopConversion"}, |
| 318 | {0x00280000, IsBusyConversion, "IsBusyConversion"}, | 395 | {0x00280000, IsBusyConversion, "IsBusyConversion"}, |
| 396 | {0x002901C0, SetConversionParams, "SetConversionParams"}, | ||
| 319 | {0x002A0000, PingProcess, "PingProcess"}, | 397 | {0x002A0000, PingProcess, "PingProcess"}, |
| 320 | {0x002B0000, nullptr, "DriverInitialize"}, | 398 | {0x002B0000, DriverInitialize, "DriverInitialize"}, |
| 321 | {0x002C0000, nullptr, "DriverFinalize"}, | 399 | {0x002C0000, DriverFinalize, "DriverFinalize"}, |
| 322 | }; | 400 | }; |
| 323 | 401 | ||
| 324 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 402 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| @@ -326,7 +404,7 @@ const Interface::FunctionInfo FunctionTable[] = { | |||
| 326 | 404 | ||
| 327 | Interface::Interface() { | 405 | Interface::Interface() { |
| 328 | completion_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "Y2R:Completed"); | 406 | completion_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "Y2R:Completed"); |
| 329 | std::memset(&conversion_params, 0, sizeof(conversion_params)); | 407 | std::memset(&conversion, 0, sizeof(conversion)); |
| 330 | 408 | ||
| 331 | Register(FunctionTable); | 409 | Register(FunctionTable); |
| 332 | } | 410 | } |
diff --git a/src/core/hle/service/y2r_u.h b/src/core/hle/service/y2r_u.h index 171aecfd1..7df47fcb9 100644 --- a/src/core/hle/service/y2r_u.h +++ b/src/core/hle/service/y2r_u.h | |||
| @@ -4,6 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 7 | #include "core/hle/service/service.h" | 11 | #include "core/hle/service/service.h" |
| 8 | 12 | ||
| 9 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 13 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| @@ -11,6 +15,98 @@ | |||
| 11 | 15 | ||
| 12 | namespace Y2R_U { | 16 | namespace Y2R_U { |
| 13 | 17 | ||
| 18 | enum class InputFormat : u8 { | ||
| 19 | /// 8-bit input, with YUV components in separate planes and 4:2:2 subsampling. | ||
| 20 | YUV422_Indiv8 = 0, | ||
| 21 | /// 8-bit input, with YUV components in separate planes and 4:2:0 subsampling. | ||
| 22 | YUV420_Indiv8 = 1, | ||
| 23 | |||
| 24 | /// 16-bit input (only LSB used), with YUV components in separate planes and 4:2:2 subsampling. | ||
| 25 | YUV422_Indiv16 = 2, | ||
| 26 | /// 16-bit input (only LSB used), with YUV components in separate planes and 4:2:0 subsampling. | ||
| 27 | YUV420_Indiv16 = 3, | ||
| 28 | |||
| 29 | /// 8-bit input, with a single interleaved stream in YUYV format and 4:2:2 subsampling. | ||
| 30 | YUYV422_Interleaved = 4, | ||
| 31 | }; | ||
| 32 | |||
| 33 | enum class OutputFormat : u8 { | ||
| 34 | RGBA8 = 0, | ||
| 35 | RGB8 = 1, | ||
| 36 | RGB5A1 = 2, | ||
| 37 | RGB565 = 3, | ||
| 38 | }; | ||
| 39 | |||
| 40 | enum class Rotation : u8 { | ||
| 41 | None = 0, | ||
| 42 | Clockwise_90 = 1, | ||
| 43 | Clockwise_180 = 2, | ||
| 44 | Clockwise_270 = 3, | ||
| 45 | }; | ||
| 46 | |||
| 47 | enum class BlockAlignment : u8 { | ||
| 48 | /// Image is output in linear format suitable for use as a framebuffer. | ||
| 49 | Linear = 0, | ||
| 50 | /// Image is output in tiled PICA format, suitable for use as a texture. | ||
| 51 | Block8x8 = 1, | ||
| 52 | }; | ||
| 53 | |||
| 54 | enum class StandardCoefficient : u8 { | ||
| 55 | /// ITU Rec. BT.601 primaries, with PC ranges. | ||
| 56 | ITU_Rec601 = 0, | ||
| 57 | /// ITU Rec. BT.709 primaries, with PC ranges. | ||
| 58 | ITU_Rec709 = 1, | ||
| 59 | /// ITU Rec. BT.601 primaries, with TV ranges. | ||
| 60 | ITU_Rec601_Scaling = 2, | ||
| 61 | /// ITU Rec. BT.709 primaries, with TV ranges. | ||
| 62 | ITU_Rec709_Scaling = 3, | ||
| 63 | }; | ||
| 64 | |||
| 65 | /** | ||
| 66 | * A set of coefficients configuring the RGB to YUV conversion. Coefficients 0-4 are unsigned 2.8 | ||
| 67 | * fixed pointer numbers representing entries on the conversion matrix, while coefficient 5-7 are | ||
| 68 | * signed 11.5 fixed point numbers added as offsets to the RGB result. | ||
| 69 | * | ||
| 70 | * The overall conversion process formula is: | ||
| 71 | * ``` | ||
| 72 | * R = trunc((c_0 * Y + c_1 * V) + c_5 + 0.75) | ||
| 73 | * G = trunc((c_0 * Y - c_3 * U - c_2 * V) + c_6 + 0.75) | ||
| 74 | * B = trunc((c_0 * Y + c_4 * U ) + c_7 + 0.75) | ||
| 75 | * ``` | ||
| 76 | */ | ||
| 77 | using CoefficientSet = std::array<s16, 8>; | ||
| 78 | |||
| 79 | struct ConversionBuffer { | ||
| 80 | /// Current reading/writing address of this buffer. | ||
| 81 | VAddr address; | ||
| 82 | /// Remaining amount of bytes to be DMAed, does not include the inter-trasfer gap. | ||
| 83 | u32 image_size; | ||
| 84 | /// Size of a single DMA transfer. | ||
| 85 | u16 transfer_unit; | ||
| 86 | /// Amount of bytes to be skipped between copying each `transfer_unit` bytes. | ||
| 87 | u16 gap; | ||
| 88 | }; | ||
| 89 | |||
| 90 | struct ConversionConfiguration { | ||
| 91 | InputFormat input_format; | ||
| 92 | OutputFormat output_format; | ||
| 93 | Rotation rotation; | ||
| 94 | BlockAlignment block_alignment; | ||
| 95 | u16 input_line_width; | ||
| 96 | u16 input_lines; | ||
| 97 | CoefficientSet coefficients; | ||
| 98 | u16 alpha; | ||
| 99 | |||
| 100 | /// Input parameters for the Y (luma) plane | ||
| 101 | ConversionBuffer src_Y, src_U, src_V, src_YUYV; | ||
| 102 | /// Output parameters for the conversion results | ||
| 103 | ConversionBuffer dst; | ||
| 104 | |||
| 105 | ResultCode SetInputLineWidth(u16 width); | ||
| 106 | ResultCode SetInputLines(u16 lines); | ||
| 107 | ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient); | ||
| 108 | }; | ||
| 109 | |||
| 14 | class Interface : public Service::Interface { | 110 | class Interface : public Service::Interface { |
| 15 | public: | 111 | public: |
| 16 | Interface(); | 112 | Interface(); |
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp new file mode 100644 index 000000000..5b7fb39e1 --- /dev/null +++ b/src/core/hw/y2r.cpp | |||
| @@ -0,0 +1,369 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <numeric> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/color.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/math_util.h" | ||
| 12 | #include "common/vector_math.h" | ||
| 13 | |||
| 14 | #include "core/hle/service/y2r_u.h" | ||
| 15 | #include "core/memory.h" | ||
| 16 | |||
| 17 | namespace HW { | ||
| 18 | namespace Y2R { | ||
| 19 | |||
| 20 | using namespace Y2R_U; | ||
| 21 | |||
| 22 | static const size_t MAX_TILES = 1024 / 8; | ||
| 23 | static const size_t TILE_SIZE = 8 * 8; | ||
| 24 | using ImageTile = std::array<u32, TILE_SIZE>; | ||
| 25 | |||
| 26 | /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. | ||
| 27 | static void ConvertYUVToRGB(InputFormat input_format, | ||
| 28 | const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[], | ||
| 29 | unsigned int width, unsigned int height, const CoefficientSet& coefficients) { | ||
| 30 | |||
| 31 | for (unsigned int y = 0; y < height; ++y) { | ||
| 32 | for (unsigned int x = 0; x < width; ++x) { | ||
| 33 | s32 Y, U, V; | ||
| 34 | switch (input_format) { | ||
| 35 | case InputFormat::YUV422_Indiv8: | ||
| 36 | case InputFormat::YUV422_Indiv16: | ||
| 37 | Y = input_Y[y * width + x]; | ||
| 38 | U = input_U[(y * width + x) / 2]; | ||
| 39 | V = input_V[(y * width + x) / 2]; | ||
| 40 | break; | ||
| 41 | case InputFormat::YUV420_Indiv8: | ||
| 42 | case InputFormat::YUV420_Indiv16: | ||
| 43 | Y = input_Y[y * width + x]; | ||
| 44 | U = input_U[((y / 2) * width + x) / 2]; | ||
| 45 | V = input_V[((y / 2) * width + x) / 2]; | ||
| 46 | break; | ||
| 47 | case InputFormat::YUYV422_Interleaved: | ||
| 48 | Y = input_Y[(y * width + x) * 2]; | ||
| 49 | U = input_Y[(y * width + (x / 2) * 2) * 2 + 1]; | ||
| 50 | V = input_Y[(y * width + (x / 2) * 2) * 2 + 3]; | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | |||
| 54 | // This conversion process is bit-exact with hardware, as far as could be tested. | ||
| 55 | auto& c = coefficients; | ||
| 56 | s32 cY = c[0]*Y; | ||
| 57 | |||
| 58 | s32 r = cY + c[1]*V; | ||
| 59 | s32 g = cY - c[3]*U - c[2]*V; | ||
| 60 | s32 b = cY + c[4]*U; | ||
| 61 | |||
| 62 | const s32 rounding_offset = 0x18; | ||
| 63 | r = (r >> 3) + c[5] + rounding_offset; | ||
| 64 | g = (g >> 3) + c[6] + rounding_offset; | ||
| 65 | b = (b >> 3) + c[7] + rounding_offset; | ||
| 66 | |||
| 67 | unsigned int tile = x / 8; | ||
| 68 | unsigned int tile_x = x % 8; | ||
| 69 | u32* out = &output[tile][y * 8 + tile_x]; | ||
| 70 | |||
| 71 | using MathUtil::Clamp; | ||
| 72 | *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | | ||
| 73 | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) | | ||
| 74 | ((u32)Clamp(b >> 5, 0, 0xFF) << 8); | ||
| 75 | } | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | /// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit. | ||
| 80 | template <size_t N> | ||
| 81 | static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { | ||
| 82 | const u8* input = Memory::GetPointer(buf.address); | ||
| 83 | |||
| 84 | size_t output_unit = buf.transfer_unit / N; | ||
| 85 | ASSERT(amount_of_data % output_unit == 0); | ||
| 86 | |||
| 87 | while (amount_of_data > 0) { | ||
| 88 | for (size_t i = 0; i < output_unit; ++i) { | ||
| 89 | output[i] = input[i * N]; | ||
| 90 | } | ||
| 91 | |||
| 92 | output += output_unit; | ||
| 93 | input += buf.transfer_unit + buf.gap; | ||
| 94 | |||
| 95 | buf.address += buf.transfer_unit + buf.gap; | ||
| 96 | buf.image_size -= buf.transfer_unit; | ||
| 97 | amount_of_data -= output_unit; | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | /// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer. | ||
| 102 | static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, | ||
| 103 | OutputFormat output_format, u8 alpha) { | ||
| 104 | |||
| 105 | u8* output = Memory::GetPointer(buf.address); | ||
| 106 | |||
| 107 | while (amount_of_data > 0) { | ||
| 108 | u8* unit_end = output + buf.transfer_unit; | ||
| 109 | while (output < unit_end) { | ||
| 110 | u32 color = *input++; | ||
| 111 | Math::Vec4<u8> col_vec{ | ||
| 112 | (color >> 24) & 0xFF, (color >> 16) & 0xFF, (color >> 8) & 0xFF, alpha, | ||
| 113 | }; | ||
| 114 | |||
| 115 | switch (output_format) { | ||
| 116 | case OutputFormat::RGBA8: | ||
| 117 | Color::EncodeRGBA8(col_vec, output); | ||
| 118 | output += 4; | ||
| 119 | break; | ||
| 120 | case OutputFormat::RGB8: | ||
| 121 | Color::EncodeRGB8(col_vec, output); | ||
| 122 | output += 3; | ||
| 123 | break; | ||
| 124 | case OutputFormat::RGB5A1: | ||
| 125 | Color::EncodeRGB5A1(col_vec, output); | ||
| 126 | output += 2; | ||
| 127 | break; | ||
| 128 | case OutputFormat::RGB565: | ||
| 129 | Color::EncodeRGB565(col_vec, output); | ||
| 130 | output += 2; | ||
| 131 | break; | ||
| 132 | } | ||
| 133 | |||
| 134 | amount_of_data -= 1; | ||
| 135 | } | ||
| 136 | |||
| 137 | output += buf.gap; | ||
| 138 | buf.address += buf.transfer_unit + buf.gap; | ||
| 139 | buf.image_size -= buf.transfer_unit; | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | static const u8 linear_lut[64] = { | ||
| 144 | 0, 1, 2, 3, 4, 5, 6, 7, | ||
| 145 | 8, 9, 10, 11, 12, 13, 14, 15, | ||
| 146 | 16, 17, 18, 19, 20, 21, 22, 23, | ||
| 147 | 24, 25, 26, 27, 28, 29, 30, 31, | ||
| 148 | 32, 33, 34, 35, 36, 37, 38, 39, | ||
| 149 | 40, 41, 42, 43, 44, 45, 46, 47, | ||
| 150 | 48, 49, 50, 51, 52, 53, 54, 55, | ||
| 151 | 56, 57, 58, 59, 60, 61, 62, 63, | ||
| 152 | }; | ||
| 153 | |||
| 154 | static const u8 morton_lut[64] = { | ||
| 155 | 0, 1, 4, 5, 16, 17, 20, 21, | ||
| 156 | 2, 3, 6, 7, 18, 19, 22, 23, | ||
| 157 | 8, 9, 12, 13, 24, 25, 28, 29, | ||
| 158 | 10, 11, 14, 15, 26, 27, 30, 31, | ||
| 159 | 32, 33, 36, 37, 48, 49, 52, 53, | ||
| 160 | 34, 35, 38, 39, 50, 51, 54, 55, | ||
| 161 | 40, 41, 44, 45, 56, 57, 60, 61, | ||
| 162 | 42, 43, 46, 47, 58, 59, 62, 63, | ||
| 163 | }; | ||
| 164 | |||
| 165 | static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | ||
| 166 | for (int i = 0; i < height * 8; ++i) { | ||
| 167 | output[out_map[i]] = input[i]; | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | ||
| 172 | int out_i = 0; | ||
| 173 | for (int x = 0; x < 8; ++x) { | ||
| 174 | for (int y = height - 1; y >= 0; --y) { | ||
| 175 | output[out_map[out_i++]] = input[y * 8 + x]; | ||
| 176 | } | ||
| 177 | } | ||
| 178 | } | ||
| 179 | |||
| 180 | static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | ||
| 181 | int out_i = 0; | ||
| 182 | for (int i = height * 8 - 1; i >= 0; --i) { | ||
| 183 | output[out_map[out_i++]] = input[i]; | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | ||
| 188 | int out_i = 0; | ||
| 189 | for (int x = 8-1; x >= 0; --x) { | ||
| 190 | for (int y = 0; y < height; ++y) { | ||
| 191 | output[out_map[out_i++]] = input[y * 8 + x]; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | static void WriteTileToOutput(u32* output, const ImageTile& tile, int height, int line_stride) { | ||
| 197 | for (int y = 0; y < height; ++y) { | ||
| 198 | for (int x = 0; x < 8; ++x) { | ||
| 199 | output[y * line_stride + x] = tile[y * 8 + x]; | ||
| 200 | } | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | /** | ||
| 205 | * Performs a Y2R colorspace conversion. | ||
| 206 | * | ||
| 207 | * The Y2R hardware implements hardware-accelerated YUV to RGB colorspace conversions. It is most | ||
| 208 | * commonly used for video playback or to display camera input to the screen. | ||
| 209 | * | ||
| 210 | * The conversion process is quite configurable, and can be divided in distinct steps. From | ||
| 211 | * observation, it appears that the hardware buffers a single 8-pixel tall strip of image data | ||
| 212 | * internally and converts it in one go before writing to the output and loading the next strip. | ||
| 213 | * | ||
| 214 | * The steps taken to convert one strip of image data are: | ||
| 215 | * | ||
| 216 | * - The hardware receives data via CDMA (http://3dbrew.org/wiki/Corelink_DMA_Engines), which is | ||
| 217 | * presumably stored in one or more internal buffers. This process can be done in several separate | ||
| 218 | * transfers, as long as they don't exceed the size of the internal image buffer. This allows | ||
| 219 | * flexibility in input strides. | ||
| 220 | * - The input data is decoded into a YUV tuple. Several formats are suported, see the `InputFormat` | ||
| 221 | * enum. | ||
| 222 | * - The YUV tuple is converted, using fixed point calculations, to RGB. This step can be configured | ||
| 223 | * using a set of coefficients to support different colorspace standards. See `CoefficientSet`. | ||
| 224 | * - The strip can be optionally rotated 90, 180 or 270 degrees. Since each strip is processed | ||
| 225 | * independently, this notably rotates each *strip*, not the entire image. This means that for 90 | ||
| 226 | * or 270 degree rotations, the output will be in terms of several 8 x height images, and for any | ||
| 227 | * non-zero rotation the strips will have to be re-arranged so that the parts of the image will | ||
| 228 | * not be shuffled together. This limitation makes this a feature of somewhat dubious utility. 90 | ||
| 229 | * or 270 degree rotations in images with non-even height don't seem to work properly. | ||
| 230 | * - The data is converted to the output RGB format. See the `OutputFormat` enum. | ||
| 231 | * - The data can be output either linearly line-by-line or in the swizzled 8x8 tile format used by | ||
| 232 | * the PICA. This is decided by the `BlockAlignment` enum. If 8x8 alignment is used, then the | ||
| 233 | * image must have a height divisible by 8. The image width must always be divisible by 8. | ||
| 234 | * - The final data is then CDMAed out to main memory and the next image strip is processed. This | ||
| 235 | * offers the same flexibility as the input stage. | ||
| 236 | * | ||
| 237 | * In this implementation, to avoid the combinatorial explosion of parameter combinations, common | ||
| 238 | * intermediate formats are used and where possible tables or parameters are used instead of | ||
| 239 | * diverging code paths to keep the amount of branches in check. Some steps are also merged to | ||
| 240 | * increase efficiency. | ||
| 241 | * | ||
| 242 | * Output for all valid settings combinations matches hardware, however output in some edge-cases | ||
| 243 | * differs: | ||
| 244 | * | ||
| 245 | * - `Block8x8` alignment with non-mod8 height produces different garbage patterns on the last | ||
| 246 | * strip, especially when combined with rotation. | ||
| 247 | * - Hardware, when using `Linear` alignment with a non-even height and 90 or 270 degree rotation | ||
| 248 | * produces misaligned output on the last strip. This implmentation produces output with the | ||
| 249 | * correct "expected" alignment. | ||
| 250 | * | ||
| 251 | * Hardware behaves strangely (doesn't fire the completion interrupt, for example) in these cases, | ||
| 252 | * so they are believed to be invalid configurations anyway. | ||
| 253 | */ | ||
| 254 | void PerformConversion(ConversionConfiguration& cvt) { | ||
| 255 | ASSERT(cvt.input_line_width % 8 == 0); | ||
| 256 | ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0); | ||
| 257 | // Tiles per row | ||
| 258 | size_t num_tiles = cvt.input_line_width / 8; | ||
| 259 | ASSERT(num_tiles < MAX_TILES); | ||
| 260 | |||
| 261 | // Buffer used as a CDMA source/target. | ||
| 262 | std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]); | ||
| 263 | // Intermediate storage for decoded 8x8 image tiles. Always stored as RGB32. | ||
| 264 | std::unique_ptr<ImageTile[]> tiles(new ImageTile[num_tiles]); | ||
| 265 | ImageTile tmp_tile; | ||
| 266 | |||
| 267 | // LUT used to remap writes to a tile. Used to allow linear or swizzled output without | ||
| 268 | // requiring two different code paths. | ||
| 269 | const u8* tile_remap; | ||
| 270 | switch (cvt.block_alignment) { | ||
| 271 | case BlockAlignment::Linear: | ||
| 272 | tile_remap = linear_lut; break; | ||
| 273 | case BlockAlignment::Block8x8: | ||
| 274 | tile_remap = morton_lut; break; | ||
| 275 | } | ||
| 276 | |||
| 277 | for (unsigned int y = 0; y < cvt.input_lines; y += 8) { | ||
| 278 | unsigned int row_height = std::min(cvt.input_lines - y, 8u); | ||
| 279 | |||
| 280 | // Total size in pixels of incoming data required for this strip. | ||
| 281 | const size_t row_data_size = row_height * cvt.input_line_width; | ||
| 282 | |||
| 283 | u8* input_Y = data_buffer.get(); | ||
| 284 | u8* input_U = input_Y + 8 * cvt.input_line_width; | ||
| 285 | u8* input_V = input_U + 8 * cvt.input_line_width / 2; | ||
| 286 | |||
| 287 | switch (cvt.input_format) { | ||
| 288 | case InputFormat::YUV422_Indiv8: | ||
| 289 | ReceiveData<1>(input_Y, cvt.src_Y, row_data_size); | ||
| 290 | ReceiveData<1>(input_U, cvt.src_U, row_data_size / 2); | ||
| 291 | ReceiveData<1>(input_V, cvt.src_V, row_data_size / 2); | ||
| 292 | break; | ||
| 293 | case InputFormat::YUV420_Indiv8: | ||
| 294 | ReceiveData<1>(input_Y, cvt.src_Y, row_data_size); | ||
| 295 | ReceiveData<1>(input_U, cvt.src_U, row_data_size / 4); | ||
| 296 | ReceiveData<1>(input_V, cvt.src_V, row_data_size / 4); | ||
| 297 | break; | ||
| 298 | case InputFormat::YUV422_Indiv16: | ||
| 299 | ReceiveData<2>(input_Y, cvt.src_Y, row_data_size); | ||
| 300 | ReceiveData<2>(input_U, cvt.src_U, row_data_size / 2); | ||
| 301 | ReceiveData<2>(input_V, cvt.src_V, row_data_size / 2); | ||
| 302 | break; | ||
| 303 | case InputFormat::YUV420_Indiv16: | ||
| 304 | ReceiveData<2>(input_Y, cvt.src_Y, row_data_size); | ||
| 305 | ReceiveData<2>(input_U, cvt.src_U, row_data_size / 4); | ||
| 306 | ReceiveData<2>(input_V, cvt.src_V, row_data_size / 4); | ||
| 307 | break; | ||
| 308 | case InputFormat::YUYV422_Interleaved: | ||
| 309 | input_U = nullptr; | ||
| 310 | input_V = nullptr; | ||
| 311 | ReceiveData<1>(input_Y, cvt.src_YUYV, row_data_size * 2); | ||
| 312 | break; | ||
| 313 | } | ||
| 314 | |||
| 315 | // Note(yuriks): If additional optimization is required, input_format can be moved to a | ||
| 316 | // template parameter, so that its dispatch can be moved to outside the inner loop. | ||
| 317 | ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), | ||
| 318 | cvt.input_line_width, row_height, cvt.coefficients); | ||
| 319 | |||
| 320 | u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); | ||
| 321 | |||
| 322 | for (int i = 0; i < num_tiles; ++i) { | ||
| 323 | int image_strip_width, output_stride; | ||
| 324 | |||
| 325 | switch (cvt.rotation) { | ||
| 326 | case Rotation::None: | ||
| 327 | RotateTile0(tiles[i], tmp_tile, row_height, tile_remap); | ||
| 328 | image_strip_width = cvt.input_line_width; | ||
| 329 | output_stride = 8; | ||
| 330 | break; | ||
| 331 | case Rotation::Clockwise_90: | ||
| 332 | RotateTile90(tiles[i], tmp_tile, row_height, tile_remap); | ||
| 333 | image_strip_width = 8; | ||
| 334 | output_stride = 8 * row_height; | ||
| 335 | break; | ||
| 336 | case Rotation::Clockwise_180: | ||
| 337 | // For 180 and 270 degree rotations we also invert the order of tiles in the strip, | ||
| 338 | // since the rotates are done individually on each tile. | ||
| 339 | RotateTile180(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap); | ||
| 340 | image_strip_width = cvt.input_line_width; | ||
| 341 | output_stride = 8; | ||
| 342 | break; | ||
| 343 | case Rotation::Clockwise_270: | ||
| 344 | RotateTile270(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap); | ||
| 345 | image_strip_width = 8; | ||
| 346 | output_stride = 8 * row_height; | ||
| 347 | break; | ||
| 348 | } | ||
| 349 | |||
| 350 | switch (cvt.block_alignment) { | ||
| 351 | case BlockAlignment::Linear: | ||
| 352 | WriteTileToOutput(output_buffer, tmp_tile, row_height, image_strip_width); | ||
| 353 | output_buffer += output_stride; | ||
| 354 | break; | ||
| 355 | case BlockAlignment::Block8x8: | ||
| 356 | WriteTileToOutput(output_buffer, tmp_tile, 8, 8); | ||
| 357 | output_buffer += TILE_SIZE; | ||
| 358 | break; | ||
| 359 | } | ||
| 360 | } | ||
| 361 | |||
| 362 | // Note(yuriks): If additional optimization is required, output_format can be moved to a | ||
| 363 | // template parameter, so that its dispatch can be moved to outside the inner loop. | ||
| 364 | SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha); | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | } | ||
| 369 | } | ||
diff --git a/src/core/hw/y2r.h b/src/core/hw/y2r.h new file mode 100644 index 000000000..729e1eee3 --- /dev/null +++ b/src/core/hw/y2r.h | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | namespace Y2R_U { | ||
| 6 | struct ConversionConfiguration; | ||
| 7 | } | ||
| 8 | |||
| 9 | namespace HW { | ||
| 10 | namespace Y2R { | ||
| 11 | |||
| 12 | void PerformConversion(Y2R_U::ConversionConfiguration& cvt); | ||
| 13 | |||
| 14 | } | ||
| 15 | } | ||