diff options
Diffstat (limited to 'src/core/hw/y2r.cpp')
| -rw-r--r-- | src/core/hw/y2r.cpp | 65 |
1 files changed, 36 insertions, 29 deletions
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp index 083391e83..6a6c707a2 100644 --- a/src/core/hw/y2r.cpp +++ b/src/core/hw/y2r.cpp | |||
| @@ -6,13 +6,11 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | |||
| 10 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 11 | #include "common/color.h" | 10 | #include "common/color.h" |
| 12 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 13 | #include "common/math_util.h" | 12 | #include "common/math_util.h" |
| 14 | #include "common/vector_math.h" | 13 | #include "common/vector_math.h" |
| 15 | |||
| 16 | #include "core/hle/service/y2r_u.h" | 14 | #include "core/hle/service/y2r_u.h" |
| 17 | #include "core/hw/y2r.h" | 15 | #include "core/hw/y2r.h" |
| 18 | #include "core/memory.h" | 16 | #include "core/memory.h" |
| @@ -27,9 +25,9 @@ static const size_t TILE_SIZE = 8 * 8; | |||
| 27 | using ImageTile = std::array<u32, TILE_SIZE>; | 25 | using ImageTile = std::array<u32, TILE_SIZE>; |
| 28 | 26 | ||
| 29 | /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. | 27 | /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. |
| 30 | static void ConvertYUVToRGB(InputFormat input_format, | 28 | static void ConvertYUVToRGB(InputFormat input_format, const u8* input_Y, const u8* input_U, |
| 31 | const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[], | 29 | const u8* input_V, ImageTile output[], unsigned int width, |
| 32 | unsigned int width, unsigned int height, const CoefficientSet& coefficients) { | 30 | unsigned int height, const CoefficientSet& coefficients) { |
| 33 | 31 | ||
| 34 | for (unsigned int y = 0; y < height; ++y) { | 32 | for (unsigned int y = 0; y < height; ++y) { |
| 35 | for (unsigned int x = 0; x < width; ++x) { | 33 | for (unsigned int x = 0; x < width; ++x) { |
| @@ -58,11 +56,11 @@ static void ConvertYUVToRGB(InputFormat input_format, | |||
| 58 | 56 | ||
| 59 | // This conversion process is bit-exact with hardware, as far as could be tested. | 57 | // This conversion process is bit-exact with hardware, as far as could be tested. |
| 60 | auto& c = coefficients; | 58 | auto& c = coefficients; |
| 61 | s32 cY = c[0]*Y; | 59 | s32 cY = c[0] * Y; |
| 62 | 60 | ||
| 63 | s32 r = cY + c[1]*V; | 61 | s32 r = cY + c[1] * V; |
| 64 | s32 g = cY - c[3]*U - c[2]*V; | 62 | s32 g = cY - c[2] * V - c[3] * U; |
| 65 | s32 b = cY + c[4]*U; | 63 | s32 b = cY + c[4] * U; |
| 66 | 64 | ||
| 67 | const s32 rounding_offset = 0x18; | 65 | const s32 rounding_offset = 0x18; |
| 68 | r = (r >> 3) + c[5] + rounding_offset; | 66 | r = (r >> 3) + c[5] + rounding_offset; |
| @@ -74,14 +72,14 @@ static void ConvertYUVToRGB(InputFormat input_format, | |||
| 74 | u32* out = &output[tile][y * 8 + tile_x]; | 72 | u32* out = &output[tile][y * 8 + tile_x]; |
| 75 | 73 | ||
| 76 | using MathUtil::Clamp; | 74 | using MathUtil::Clamp; |
| 77 | *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | | 75 | *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) | |
| 78 | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) | | ||
| 79 | ((u32)Clamp(b >> 5, 0, 0xFF) << 8); | 76 | ((u32)Clamp(b >> 5, 0, 0xFF) << 8); |
| 80 | } | 77 | } |
| 81 | } | 78 | } |
| 82 | } | 79 | } |
| 83 | 80 | ||
| 84 | /// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit. | 81 | /// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit |
| 82 | /// formats to 8-bit. | ||
| 85 | template <size_t N> | 83 | template <size_t N> |
| 86 | static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { | 84 | static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { |
| 87 | const u8* input = Memory::GetPointer(buf.address); | 85 | const u8* input = Memory::GetPointer(buf.address); |
| @@ -103,9 +101,10 @@ static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data | |||
| 103 | } | 101 | } |
| 104 | } | 102 | } |
| 105 | 103 | ||
| 106 | /// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer. | 104 | /// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA |
| 105 | /// transfer. | ||
| 107 | static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, | 106 | static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, |
| 108 | OutputFormat output_format, u8 alpha) { | 107 | OutputFormat output_format, u8 alpha) { |
| 109 | 108 | ||
| 110 | u8* output = Memory::GetPointer(buf.address); | 109 | u8* output = Memory::GetPointer(buf.address); |
| 111 | 110 | ||
| @@ -113,9 +112,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data | |||
| 113 | u8* unit_end = output + buf.transfer_unit; | 112 | u8* unit_end = output + buf.transfer_unit; |
| 114 | while (output < unit_end) { | 113 | while (output < unit_end) { |
| 115 | u32 color = *input++; | 114 | u32 color = *input++; |
| 116 | Math::Vec4<u8> col_vec{ | 115 | Math::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha}; |
| 117 | (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha | ||
| 118 | }; | ||
| 119 | 116 | ||
| 120 | switch (output_format) { | 117 | switch (output_format) { |
| 121 | case OutputFormat::RGBA8: | 118 | case OutputFormat::RGBA8: |
| @@ -145,7 +142,8 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data | |||
| 145 | } | 142 | } |
| 146 | } | 143 | } |
| 147 | 144 | ||
| 148 | static const u8 linear_lut[64] = { | 145 | static const u8 linear_lut[TILE_SIZE] = { |
| 146 | // clang-format off | ||
| 149 | 0, 1, 2, 3, 4, 5, 6, 7, | 147 | 0, 1, 2, 3, 4, 5, 6, 7, |
| 150 | 8, 9, 10, 11, 12, 13, 14, 15, | 148 | 8, 9, 10, 11, 12, 13, 14, 15, |
| 151 | 16, 17, 18, 19, 20, 21, 22, 23, | 149 | 16, 17, 18, 19, 20, 21, 22, 23, |
| @@ -154,9 +152,11 @@ static const u8 linear_lut[64] = { | |||
| 154 | 40, 41, 42, 43, 44, 45, 46, 47, | 152 | 40, 41, 42, 43, 44, 45, 46, 47, |
| 155 | 48, 49, 50, 51, 52, 53, 54, 55, | 153 | 48, 49, 50, 51, 52, 53, 54, 55, |
| 156 | 56, 57, 58, 59, 60, 61, 62, 63, | 154 | 56, 57, 58, 59, 60, 61, 62, 63, |
| 155 | // clang-format on | ||
| 157 | }; | 156 | }; |
| 158 | 157 | ||
| 159 | static const u8 morton_lut[64] = { | 158 | static const u8 morton_lut[TILE_SIZE] = { |
| 159 | // clang-format off | ||
| 160 | 0, 1, 4, 5, 16, 17, 20, 21, | 160 | 0, 1, 4, 5, 16, 17, 20, 21, |
| 161 | 2, 3, 6, 7, 18, 19, 22, 23, | 161 | 2, 3, 6, 7, 18, 19, 22, 23, |
| 162 | 8, 9, 12, 13, 24, 25, 28, 29, | 162 | 8, 9, 12, 13, 24, 25, 28, 29, |
| @@ -165,15 +165,18 @@ static const u8 morton_lut[64] = { | |||
| 165 | 34, 35, 38, 39, 50, 51, 54, 55, | 165 | 34, 35, 38, 39, 50, 51, 54, 55, |
| 166 | 40, 41, 44, 45, 56, 57, 60, 61, | 166 | 40, 41, 44, 45, 56, 57, 60, 61, |
| 167 | 42, 43, 46, 47, 58, 59, 62, 63, | 167 | 42, 43, 46, 47, 58, 59, 62, 63, |
| 168 | // clang-format on | ||
| 168 | }; | 169 | }; |
| 169 | 170 | ||
| 170 | static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 171 | static void RotateTile0(const ImageTile& input, ImageTile& output, int height, |
| 172 | const u8 out_map[64]) { | ||
| 171 | for (int i = 0; i < height * 8; ++i) { | 173 | for (int i = 0; i < height * 8; ++i) { |
| 172 | output[out_map[i]] = input[i]; | 174 | output[out_map[i]] = input[i]; |
| 173 | } | 175 | } |
| 174 | } | 176 | } |
| 175 | 177 | ||
| 176 | static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 178 | static void RotateTile90(const ImageTile& input, ImageTile& output, int height, |
| 179 | const u8 out_map[64]) { | ||
| 177 | int out_i = 0; | 180 | int out_i = 0; |
| 178 | for (int x = 0; x < 8; ++x) { | 181 | for (int x = 0; x < 8; ++x) { |
| 179 | for (int y = height - 1; y >= 0; --y) { | 182 | for (int y = height - 1; y >= 0; --y) { |
| @@ -182,16 +185,18 @@ static void RotateTile90(const ImageTile& input, ImageTile& output, int height, | |||
| 182 | } | 185 | } |
| 183 | } | 186 | } |
| 184 | 187 | ||
| 185 | static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 188 | static void RotateTile180(const ImageTile& input, ImageTile& output, int height, |
| 189 | const u8 out_map[64]) { | ||
| 186 | int out_i = 0; | 190 | int out_i = 0; |
| 187 | for (int i = height * 8 - 1; i >= 0; --i) { | 191 | for (int i = height * 8 - 1; i >= 0; --i) { |
| 188 | output[out_map[out_i++]] = input[i]; | 192 | output[out_map[out_i++]] = input[i]; |
| 189 | } | 193 | } |
| 190 | } | 194 | } |
| 191 | 195 | ||
| 192 | static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 196 | static void RotateTile270(const ImageTile& input, ImageTile& output, int height, |
| 197 | const u8 out_map[64]) { | ||
| 193 | int out_i = 0; | 198 | int out_i = 0; |
| 194 | for (int x = 8-1; x >= 0; --x) { | 199 | for (int x = 8 - 1; x >= 0; --x) { |
| 195 | for (int y = 0; y < height; ++y) { | 200 | for (int y = 0; y < height; ++y) { |
| 196 | output[out_map[out_i++]] = input[y * 8 + x]; | 201 | output[out_map[out_i++]] = input[y * 8 + x]; |
| 197 | } | 202 | } |
| @@ -274,9 +279,11 @@ void PerformConversion(ConversionConfiguration& cvt) { | |||
| 274 | const u8* tile_remap = nullptr; | 279 | const u8* tile_remap = nullptr; |
| 275 | switch (cvt.block_alignment) { | 280 | switch (cvt.block_alignment) { |
| 276 | case BlockAlignment::Linear: | 281 | case BlockAlignment::Linear: |
| 277 | tile_remap = linear_lut; break; | 282 | tile_remap = linear_lut; |
| 283 | break; | ||
| 278 | case BlockAlignment::Block8x8: | 284 | case BlockAlignment::Block8x8: |
| 279 | tile_remap = morton_lut; break; | 285 | tile_remap = morton_lut; |
| 286 | break; | ||
| 280 | } | 287 | } |
| 281 | 288 | ||
| 282 | for (unsigned int y = 0; y < cvt.input_lines; y += 8) { | 289 | for (unsigned int y = 0; y < cvt.input_lines; y += 8) { |
| @@ -320,7 +327,7 @@ void PerformConversion(ConversionConfiguration& cvt) { | |||
| 320 | // Note(yuriks): If additional optimization is required, input_format can be moved to a | 327 | // Note(yuriks): If additional optimization is required, input_format can be moved to a |
| 321 | // template parameter, so that its dispatch can be moved to outside the inner loop. | 328 | // template parameter, so that its dispatch can be moved to outside the inner loop. |
| 322 | ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), | 329 | ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), |
| 323 | cvt.input_line_width, row_height, cvt.coefficients); | 330 | cvt.input_line_width, row_height, cvt.coefficients); |
| 324 | 331 | ||
| 325 | u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); | 332 | u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); |
| 326 | 333 | ||
| @@ -367,9 +374,9 @@ void PerformConversion(ConversionConfiguration& cvt) { | |||
| 367 | 374 | ||
| 368 | // Note(yuriks): If additional optimization is required, output_format can be moved to a | 375 | // Note(yuriks): If additional optimization is required, output_format can be moved to a |
| 369 | // template parameter, so that its dispatch can be moved to outside the inner loop. | 376 | // template parameter, so that its dispatch can be moved to outside the inner loop. |
| 370 | SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha); | 377 | SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, |
| 378 | cvt.output_format, (u8)cvt.alpha); | ||
| 371 | } | 379 | } |
| 372 | } | 380 | } |
| 373 | |||
| 374 | } | 381 | } |
| 375 | } | 382 | } |