summaryrefslogtreecommitdiff
path: root/src/core/hw/y2r.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/hw/y2r.cpp')
-rw-r--r--src/core/hw/y2r.cpp65
1 files changed, 36 insertions, 29 deletions
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
index 083391e83..6a6c707a2 100644
--- a/src/core/hw/y2r.cpp
+++ b/src/core/hw/y2r.cpp
@@ -6,13 +6,11 @@
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <memory> 8#include <memory>
9
10#include "common/assert.h" 9#include "common/assert.h"
11#include "common/color.h" 10#include "common/color.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
13#include "common/math_util.h" 12#include "common/math_util.h"
14#include "common/vector_math.h" 13#include "common/vector_math.h"
15
16#include "core/hle/service/y2r_u.h" 14#include "core/hle/service/y2r_u.h"
17#include "core/hw/y2r.h" 15#include "core/hw/y2r.h"
18#include "core/memory.h" 16#include "core/memory.h"
@@ -27,9 +25,9 @@ static const size_t TILE_SIZE = 8 * 8;
27using ImageTile = std::array<u32, TILE_SIZE>; 25using ImageTile = std::array<u32, TILE_SIZE>;
28 26
29/// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. 27/// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles.
30static void ConvertYUVToRGB(InputFormat input_format, 28static void ConvertYUVToRGB(InputFormat input_format, const u8* input_Y, const u8* input_U,
31 const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[], 29 const u8* input_V, ImageTile output[], unsigned int width,
32 unsigned int width, unsigned int height, const CoefficientSet& coefficients) { 30 unsigned int height, const CoefficientSet& coefficients) {
33 31
34 for (unsigned int y = 0; y < height; ++y) { 32 for (unsigned int y = 0; y < height; ++y) {
35 for (unsigned int x = 0; x < width; ++x) { 33 for (unsigned int x = 0; x < width; ++x) {
@@ -58,11 +56,11 @@ static void ConvertYUVToRGB(InputFormat input_format,
58 56
59 // This conversion process is bit-exact with hardware, as far as could be tested. 57 // This conversion process is bit-exact with hardware, as far as could be tested.
60 auto& c = coefficients; 58 auto& c = coefficients;
61 s32 cY = c[0]*Y; 59 s32 cY = c[0] * Y;
62 60
63 s32 r = cY + c[1]*V; 61 s32 r = cY + c[1] * V;
64 s32 g = cY - c[3]*U - c[2]*V; 62 s32 g = cY - c[2] * V - c[3] * U;
65 s32 b = cY + c[4]*U; 63 s32 b = cY + c[4] * U;
66 64
67 const s32 rounding_offset = 0x18; 65 const s32 rounding_offset = 0x18;
68 r = (r >> 3) + c[5] + rounding_offset; 66 r = (r >> 3) + c[5] + rounding_offset;
@@ -74,14 +72,14 @@ static void ConvertYUVToRGB(InputFormat input_format,
74 u32* out = &output[tile][y * 8 + tile_x]; 72 u32* out = &output[tile][y * 8 + tile_x];
75 73
76 using MathUtil::Clamp; 74 using MathUtil::Clamp;
77 *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | 75 *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
78 ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
79 ((u32)Clamp(b >> 5, 0, 0xFF) << 8); 76 ((u32)Clamp(b >> 5, 0, 0xFF) << 8);
80 } 77 }
81 } 78 }
82} 79}
83 80
84/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit. 81/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit
82/// formats to 8-bit.
85template <size_t N> 83template <size_t N>
86static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { 84static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) {
87 const u8* input = Memory::GetPointer(buf.address); 85 const u8* input = Memory::GetPointer(buf.address);
@@ -103,9 +101,10 @@ static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data
103 } 101 }
104} 102}
105 103
106/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer. 104/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA
105/// transfer.
107static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, 106static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data,
108 OutputFormat output_format, u8 alpha) { 107 OutputFormat output_format, u8 alpha) {
109 108
110 u8* output = Memory::GetPointer(buf.address); 109 u8* output = Memory::GetPointer(buf.address);
111 110
@@ -113,9 +112,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
113 u8* unit_end = output + buf.transfer_unit; 112 u8* unit_end = output + buf.transfer_unit;
114 while (output < unit_end) { 113 while (output < unit_end) {
115 u32 color = *input++; 114 u32 color = *input++;
116 Math::Vec4<u8> col_vec{ 115 Math::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha};
117 (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha
118 };
119 116
120 switch (output_format) { 117 switch (output_format) {
121 case OutputFormat::RGBA8: 118 case OutputFormat::RGBA8:
@@ -145,7 +142,8 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
145 } 142 }
146} 143}
147 144
148static const u8 linear_lut[64] = { 145static const u8 linear_lut[TILE_SIZE] = {
146 // clang-format off
149 0, 1, 2, 3, 4, 5, 6, 7, 147 0, 1, 2, 3, 4, 5, 6, 7,
150 8, 9, 10, 11, 12, 13, 14, 15, 148 8, 9, 10, 11, 12, 13, 14, 15,
151 16, 17, 18, 19, 20, 21, 22, 23, 149 16, 17, 18, 19, 20, 21, 22, 23,
@@ -154,9 +152,11 @@ static const u8 linear_lut[64] = {
154 40, 41, 42, 43, 44, 45, 46, 47, 152 40, 41, 42, 43, 44, 45, 46, 47,
155 48, 49, 50, 51, 52, 53, 54, 55, 153 48, 49, 50, 51, 52, 53, 54, 55,
156 56, 57, 58, 59, 60, 61, 62, 63, 154 56, 57, 58, 59, 60, 61, 62, 63,
155 // clang-format on
157}; 156};
158 157
159static const u8 morton_lut[64] = { 158static const u8 morton_lut[TILE_SIZE] = {
159 // clang-format off
160 0, 1, 4, 5, 16, 17, 20, 21, 160 0, 1, 4, 5, 16, 17, 20, 21,
161 2, 3, 6, 7, 18, 19, 22, 23, 161 2, 3, 6, 7, 18, 19, 22, 23,
162 8, 9, 12, 13, 24, 25, 28, 29, 162 8, 9, 12, 13, 24, 25, 28, 29,
@@ -165,15 +165,18 @@ static const u8 morton_lut[64] = {
165 34, 35, 38, 39, 50, 51, 54, 55, 165 34, 35, 38, 39, 50, 51, 54, 55,
166 40, 41, 44, 45, 56, 57, 60, 61, 166 40, 41, 44, 45, 56, 57, 60, 61,
167 42, 43, 46, 47, 58, 59, 62, 63, 167 42, 43, 46, 47, 58, 59, 62, 63,
168 // clang-format on
168}; 169};
169 170
170static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 171static void RotateTile0(const ImageTile& input, ImageTile& output, int height,
172 const u8 out_map[64]) {
171 for (int i = 0; i < height * 8; ++i) { 173 for (int i = 0; i < height * 8; ++i) {
172 output[out_map[i]] = input[i]; 174 output[out_map[i]] = input[i];
173 } 175 }
174} 176}
175 177
176static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 178static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
179 const u8 out_map[64]) {
177 int out_i = 0; 180 int out_i = 0;
178 for (int x = 0; x < 8; ++x) { 181 for (int x = 0; x < 8; ++x) {
179 for (int y = height - 1; y >= 0; --y) { 182 for (int y = height - 1; y >= 0; --y) {
@@ -182,16 +185,18 @@ static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
182 } 185 }
183} 186}
184 187
185static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 188static void RotateTile180(const ImageTile& input, ImageTile& output, int height,
189 const u8 out_map[64]) {
186 int out_i = 0; 190 int out_i = 0;
187 for (int i = height * 8 - 1; i >= 0; --i) { 191 for (int i = height * 8 - 1; i >= 0; --i) {
188 output[out_map[out_i++]] = input[i]; 192 output[out_map[out_i++]] = input[i];
189 } 193 }
190} 194}
191 195
192static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 196static void RotateTile270(const ImageTile& input, ImageTile& output, int height,
197 const u8 out_map[64]) {
193 int out_i = 0; 198 int out_i = 0;
194 for (int x = 8-1; x >= 0; --x) { 199 for (int x = 8 - 1; x >= 0; --x) {
195 for (int y = 0; y < height; ++y) { 200 for (int y = 0; y < height; ++y) {
196 output[out_map[out_i++]] = input[y * 8 + x]; 201 output[out_map[out_i++]] = input[y * 8 + x];
197 } 202 }
@@ -274,9 +279,11 @@ void PerformConversion(ConversionConfiguration& cvt) {
274 const u8* tile_remap = nullptr; 279 const u8* tile_remap = nullptr;
275 switch (cvt.block_alignment) { 280 switch (cvt.block_alignment) {
276 case BlockAlignment::Linear: 281 case BlockAlignment::Linear:
277 tile_remap = linear_lut; break; 282 tile_remap = linear_lut;
283 break;
278 case BlockAlignment::Block8x8: 284 case BlockAlignment::Block8x8:
279 tile_remap = morton_lut; break; 285 tile_remap = morton_lut;
286 break;
280 } 287 }
281 288
282 for (unsigned int y = 0; y < cvt.input_lines; y += 8) { 289 for (unsigned int y = 0; y < cvt.input_lines; y += 8) {
@@ -320,7 +327,7 @@ void PerformConversion(ConversionConfiguration& cvt) {
320 // Note(yuriks): If additional optimization is required, input_format can be moved to a 327 // Note(yuriks): If additional optimization is required, input_format can be moved to a
321 // template parameter, so that its dispatch can be moved to outside the inner loop. 328 // template parameter, so that its dispatch can be moved to outside the inner loop.
322 ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), 329 ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(),
323 cvt.input_line_width, row_height, cvt.coefficients); 330 cvt.input_line_width, row_height, cvt.coefficients);
324 331
325 u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); 332 u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get());
326 333
@@ -367,9 +374,9 @@ void PerformConversion(ConversionConfiguration& cvt) {
367 374
368 // Note(yuriks): If additional optimization is required, output_format can be moved to a 375 // Note(yuriks): If additional optimization is required, output_format can be moved to a
369 // template parameter, so that its dispatch can be moved to outside the inner loop. 376 // template parameter, so that its dispatch can be moved to outside the inner loop.
370 SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha); 377 SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size,
378 cvt.output_format, (u8)cvt.alpha);
371 } 379 }
372} 380}
373
374} 381}
375} 382}