diff options
Diffstat (limited to 'src/core/hw/y2r.cpp')
| -rw-r--r-- | src/core/hw/y2r.cpp | 77 |
1 files changed, 36 insertions, 41 deletions
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp index 083391e83..5a68d7e65 100644 --- a/src/core/hw/y2r.cpp +++ b/src/core/hw/y2r.cpp | |||
| @@ -27,9 +27,9 @@ static const size_t TILE_SIZE = 8 * 8; | |||
| 27 | using ImageTile = std::array<u32, TILE_SIZE>; | 27 | using ImageTile = std::array<u32, TILE_SIZE>; |
| 28 | 28 | ||
| 29 | /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. | 29 | /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. |
| 30 | static void ConvertYUVToRGB(InputFormat input_format, | 30 | static void ConvertYUVToRGB(InputFormat input_format, const u8* input_Y, const u8* input_U, |
| 31 | const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[], | 31 | const u8* input_V, ImageTile output[], unsigned int width, |
| 32 | unsigned int width, unsigned int height, const CoefficientSet& coefficients) { | 32 | unsigned int height, const CoefficientSet& coefficients) { |
| 33 | 33 | ||
| 34 | for (unsigned int y = 0; y < height; ++y) { | 34 | for (unsigned int y = 0; y < height; ++y) { |
| 35 | for (unsigned int x = 0; x < width; ++x) { | 35 | for (unsigned int x = 0; x < width; ++x) { |
| @@ -58,11 +58,11 @@ static void ConvertYUVToRGB(InputFormat input_format, | |||
| 58 | 58 | ||
| 59 | // This conversion process is bit-exact with hardware, as far as could be tested. | 59 | // This conversion process is bit-exact with hardware, as far as could be tested. |
| 60 | auto& c = coefficients; | 60 | auto& c = coefficients; |
| 61 | s32 cY = c[0]*Y; | 61 | s32 cY = c[0] * Y; |
| 62 | 62 | ||
| 63 | s32 r = cY + c[1]*V; | 63 | s32 r = cY + c[1] * V; |
| 64 | s32 g = cY - c[3]*U - c[2]*V; | 64 | s32 g = cY - c[3] * U - c[2] * V; |
| 65 | s32 b = cY + c[4]*U; | 65 | s32 b = cY + c[4] * U; |
| 66 | 66 | ||
| 67 | const s32 rounding_offset = 0x18; | 67 | const s32 rounding_offset = 0x18; |
| 68 | r = (r >> 3) + c[5] + rounding_offset; | 68 | r = (r >> 3) + c[5] + rounding_offset; |
| @@ -74,14 +74,14 @@ static void ConvertYUVToRGB(InputFormat input_format, | |||
| 74 | u32* out = &output[tile][y * 8 + tile_x]; | 74 | u32* out = &output[tile][y * 8 + tile_x]; |
| 75 | 75 | ||
| 76 | using MathUtil::Clamp; | 76 | using MathUtil::Clamp; |
| 77 | *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | | 77 | *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) | |
| 78 | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) | | ||
| 79 | ((u32)Clamp(b >> 5, 0, 0xFF) << 8); | 78 | ((u32)Clamp(b >> 5, 0, 0xFF) << 8); |
| 80 | } | 79 | } |
| 81 | } | 80 | } |
| 82 | } | 81 | } |
| 83 | 82 | ||
| 84 | /// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit. | 83 | /// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit |
| 84 | /// formats to 8-bit. | ||
| 85 | template <size_t N> | 85 | template <size_t N> |
| 86 | static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { | 86 | static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { |
| 87 | const u8* input = Memory::GetPointer(buf.address); | 87 | const u8* input = Memory::GetPointer(buf.address); |
| @@ -103,9 +103,10 @@ static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data | |||
| 103 | } | 103 | } |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | /// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer. | 106 | /// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA |
| 107 | /// transfer. | ||
| 107 | static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, | 108 | static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, |
| 108 | OutputFormat output_format, u8 alpha) { | 109 | OutputFormat output_format, u8 alpha) { |
| 109 | 110 | ||
| 110 | u8* output = Memory::GetPointer(buf.address); | 111 | u8* output = Memory::GetPointer(buf.address); |
| 111 | 112 | ||
| @@ -113,9 +114,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data | |||
| 113 | u8* unit_end = output + buf.transfer_unit; | 114 | u8* unit_end = output + buf.transfer_unit; |
| 114 | while (output < unit_end) { | 115 | while (output < unit_end) { |
| 115 | u32 color = *input++; | 116 | u32 color = *input++; |
| 116 | Math::Vec4<u8> col_vec{ | 117 | Math::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha}; |
| 117 | (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha | ||
| 118 | }; | ||
| 119 | 118 | ||
| 120 | switch (output_format) { | 119 | switch (output_format) { |
| 121 | case OutputFormat::RGBA8: | 120 | case OutputFormat::RGBA8: |
| @@ -146,34 +145,26 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data | |||
| 146 | } | 145 | } |
| 147 | 146 | ||
| 148 | static const u8 linear_lut[64] = { | 147 | static const u8 linear_lut[64] = { |
| 149 | 0, 1, 2, 3, 4, 5, 6, 7, | 148 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, |
| 150 | 8, 9, 10, 11, 12, 13, 14, 15, | 149 | 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, |
| 151 | 16, 17, 18, 19, 20, 21, 22, 23, | 150 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, |
| 152 | 24, 25, 26, 27, 28, 29, 30, 31, | ||
| 153 | 32, 33, 34, 35, 36, 37, 38, 39, | ||
| 154 | 40, 41, 42, 43, 44, 45, 46, 47, | ||
| 155 | 48, 49, 50, 51, 52, 53, 54, 55, | ||
| 156 | 56, 57, 58, 59, 60, 61, 62, 63, | ||
| 157 | }; | 151 | }; |
| 158 | 152 | ||
| 159 | static const u8 morton_lut[64] = { | 153 | static const u8 morton_lut[64] = { |
| 160 | 0, 1, 4, 5, 16, 17, 20, 21, | 154 | 0, 1, 4, 5, 16, 17, 20, 21, 2, 3, 6, 7, 18, 19, 22, 23, 8, 9, 12, 13, 24, 25, |
| 161 | 2, 3, 6, 7, 18, 19, 22, 23, | 155 | 28, 29, 10, 11, 14, 15, 26, 27, 30, 31, 32, 33, 36, 37, 48, 49, 52, 53, 34, 35, 38, 39, |
| 162 | 8, 9, 12, 13, 24, 25, 28, 29, | 156 | 50, 51, 54, 55, 40, 41, 44, 45, 56, 57, 60, 61, 42, 43, 46, 47, 58, 59, 62, 63, |
| 163 | 10, 11, 14, 15, 26, 27, 30, 31, | ||
| 164 | 32, 33, 36, 37, 48, 49, 52, 53, | ||
| 165 | 34, 35, 38, 39, 50, 51, 54, 55, | ||
| 166 | 40, 41, 44, 45, 56, 57, 60, 61, | ||
| 167 | 42, 43, 46, 47, 58, 59, 62, 63, | ||
| 168 | }; | 157 | }; |
| 169 | 158 | ||
| 170 | static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 159 | static void RotateTile0(const ImageTile& input, ImageTile& output, int height, |
| 160 | const u8 out_map[64]) { | ||
| 171 | for (int i = 0; i < height * 8; ++i) { | 161 | for (int i = 0; i < height * 8; ++i) { |
| 172 | output[out_map[i]] = input[i]; | 162 | output[out_map[i]] = input[i]; |
| 173 | } | 163 | } |
| 174 | } | 164 | } |
| 175 | 165 | ||
| 176 | static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 166 | static void RotateTile90(const ImageTile& input, ImageTile& output, int height, |
| 167 | const u8 out_map[64]) { | ||
| 177 | int out_i = 0; | 168 | int out_i = 0; |
| 178 | for (int x = 0; x < 8; ++x) { | 169 | for (int x = 0; x < 8; ++x) { |
| 179 | for (int y = height - 1; y >= 0; --y) { | 170 | for (int y = height - 1; y >= 0; --y) { |
| @@ -182,16 +173,18 @@ static void RotateTile90(const ImageTile& input, ImageTile& output, int height, | |||
| 182 | } | 173 | } |
| 183 | } | 174 | } |
| 184 | 175 | ||
| 185 | static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 176 | static void RotateTile180(const ImageTile& input, ImageTile& output, int height, |
| 177 | const u8 out_map[64]) { | ||
| 186 | int out_i = 0; | 178 | int out_i = 0; |
| 187 | for (int i = height * 8 - 1; i >= 0; --i) { | 179 | for (int i = height * 8 - 1; i >= 0; --i) { |
| 188 | output[out_map[out_i++]] = input[i]; | 180 | output[out_map[out_i++]] = input[i]; |
| 189 | } | 181 | } |
| 190 | } | 182 | } |
| 191 | 183 | ||
| 192 | static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 184 | static void RotateTile270(const ImageTile& input, ImageTile& output, int height, |
| 185 | const u8 out_map[64]) { | ||
| 193 | int out_i = 0; | 186 | int out_i = 0; |
| 194 | for (int x = 8-1; x >= 0; --x) { | 187 | for (int x = 8 - 1; x >= 0; --x) { |
| 195 | for (int y = 0; y < height; ++y) { | 188 | for (int y = 0; y < height; ++y) { |
| 196 | output[out_map[out_i++]] = input[y * 8 + x]; | 189 | output[out_map[out_i++]] = input[y * 8 + x]; |
| 197 | } | 190 | } |
| @@ -274,9 +267,11 @@ void PerformConversion(ConversionConfiguration& cvt) { | |||
| 274 | const u8* tile_remap = nullptr; | 267 | const u8* tile_remap = nullptr; |
| 275 | switch (cvt.block_alignment) { | 268 | switch (cvt.block_alignment) { |
| 276 | case BlockAlignment::Linear: | 269 | case BlockAlignment::Linear: |
| 277 | tile_remap = linear_lut; break; | 270 | tile_remap = linear_lut; |
| 271 | break; | ||
| 278 | case BlockAlignment::Block8x8: | 272 | case BlockAlignment::Block8x8: |
| 279 | tile_remap = morton_lut; break; | 273 | tile_remap = morton_lut; |
| 274 | break; | ||
| 280 | } | 275 | } |
| 281 | 276 | ||
| 282 | for (unsigned int y = 0; y < cvt.input_lines; y += 8) { | 277 | for (unsigned int y = 0; y < cvt.input_lines; y += 8) { |
| @@ -320,7 +315,7 @@ void PerformConversion(ConversionConfiguration& cvt) { | |||
| 320 | // Note(yuriks): If additional optimization is required, input_format can be moved to a | 315 | // Note(yuriks): If additional optimization is required, input_format can be moved to a |
| 321 | // template parameter, so that its dispatch can be moved to outside the inner loop. | 316 | // template parameter, so that its dispatch can be moved to outside the inner loop. |
| 322 | ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), | 317 | ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), |
| 323 | cvt.input_line_width, row_height, cvt.coefficients); | 318 | cvt.input_line_width, row_height, cvt.coefficients); |
| 324 | 319 | ||
| 325 | u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); | 320 | u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); |
| 326 | 321 | ||
| @@ -367,9 +362,9 @@ void PerformConversion(ConversionConfiguration& cvt) { | |||
| 367 | 362 | ||
| 368 | // Note(yuriks): If additional optimization is required, output_format can be moved to a | 363 | // Note(yuriks): If additional optimization is required, output_format can be moved to a |
| 369 | // template parameter, so that its dispatch can be moved to outside the inner loop. | 364 | // template parameter, so that its dispatch can be moved to outside the inner loop. |
| 370 | SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha); | 365 | SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, |
| 366 | cvt.output_format, (u8)cvt.alpha); | ||
| 371 | } | 367 | } |
| 372 | } | 368 | } |
| 373 | |||
| 374 | } | 369 | } |
| 375 | } | 370 | } |