summaryrefslogtreecommitdiff
path: root/src/core/hw
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/hw')
-rw-r--r--src/core/hw/gpu.cpp142
-rw-r--r--src/core/hw/gpu.h84
-rw-r--r--src/core/hw/hw.cpp13
-rw-r--r--src/core/hw/hw.h42
-rw-r--r--src/core/hw/lcd.cpp13
-rw-r--r--src/core/hw/lcd.h19
-rw-r--r--src/core/hw/y2r.cpp77
-rw-r--r--src/core/hw/y2r.h3
8 files changed, 209 insertions, 184 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index a4dfb7e43..bf2c066f4 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -12,15 +12,15 @@
12#include "common/microprofile.h" 12#include "common/microprofile.h"
13#include "common/vector_math.h" 13#include "common/vector_math.h"
14 14
15#include "core/settings.h"
16#include "core/memory.h"
17#include "core/core_timing.h" 15#include "core/core_timing.h"
16#include "core/memory.h"
17#include "core/settings.h"
18 18
19#include "core/hle/service/gsp_gpu.h" 19#include "core/hle/service/gsp_gpu.h"
20#include "core/hle/service/hid/hid.h" 20#include "core/hle/service/hid/hid.h"
21 21
22#include "core/hw/hw.h"
23#include "core/hw/gpu.h" 22#include "core/hw/gpu.h"
23#include "core/hw/hw.h"
24 24
25#include "core/tracer/recorder.h" 25#include "core/tracer/recorder.h"
26 26
@@ -32,7 +32,6 @@
32 32
33#include "video_core/debug_utils/debug_utils.h" 33#include "video_core/debug_utils/debug_utils.h"
34 34
35
36namespace GPU { 35namespace GPU {
37 36
38Regs g_regs; 37Regs g_regs;
@@ -49,7 +48,7 @@ static u64 frame_count;
49static bool last_skip_frame; 48static bool last_skip_frame;
50 49
51template <typename T> 50template <typename T>
52inline void Read(T &var, const u32 raw_addr) { 51inline void Read(T& var, const u32 raw_addr) {
53 u32 addr = raw_addr - HW::VADDR_GPU; 52 u32 addr = raw_addr - HW::VADDR_GPU;
54 u32 index = addr / 4; 53 u32 index = addr / 4;
55 54
@@ -105,8 +104,7 @@ inline void Write(u32 addr, const T data) {
105 104
106 // Memory fills are triggered once the fill value is written. 105 // Memory fills are triggered once the fill value is written.
107 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3): 106 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3):
108 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): 107 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): {
109 {
110 const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); 108 const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
111 auto& config = g_regs.memory_fill_config[is_second_filler]; 109 auto& config = g_regs.memory_fill_config[is_second_filler];
112 110
@@ -125,7 +123,9 @@ inline void Write(u32 addr, const T data) {
125 // regions that were between surfaces or within the touching 123 // regions that were between surfaces or within the touching
126 // ones for cpu to manually fill here. 124 // ones for cpu to manually fill here.
127 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) { 125 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
128 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress()); 126 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
127 config.GetEndAddress() -
128 config.GetStartAddress());
129 129
130 if (config.fill_24bit) { 130 if (config.fill_24bit) {
131 // fill with 24-bit values 131 // fill with 24-bit values
@@ -150,7 +150,8 @@ inline void Write(u32 addr, const T data) {
150 } 150 }
151 } 151 }
152 152
153 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); 153 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(),
154 config.GetEndAddress());
154 155
155 if (!is_second_filler) { 156 if (!is_second_filler) {
156 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); 157 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
@@ -167,15 +168,15 @@ inline void Write(u32 addr, const T data) {
167 break; 168 break;
168 } 169 }
169 170
170 case GPU_REG_INDEX(display_transfer_config.trigger): 171 case GPU_REG_INDEX(display_transfer_config.trigger): {
171 {
172 MICROPROFILE_SCOPE(GPU_DisplayTransfer); 172 MICROPROFILE_SCOPE(GPU_DisplayTransfer);
173 173
174 const auto& config = g_regs.display_transfer_config; 174 const auto& config = g_regs.display_transfer_config;
175 if (config.trigger & 1) { 175 if (config.trigger & 1) {
176 176
177 if (Pica::g_debug_context) 177 if (Pica::g_debug_context)
178 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); 178 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer,
179 nullptr);
179 180
180 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) { 181 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
181 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 182 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
@@ -187,17 +188,23 @@ inline void Write(u32 addr, const T data) {
187 u32 output_width = config.texture_copy.output_width * 16; 188 u32 output_width = config.texture_copy.output_width * 16;
188 u32 output_gap = config.texture_copy.output_gap * 16; 189 u32 output_gap = config.texture_copy.output_gap * 16;
189 190
190 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); 191 size_t contiguous_input_size =
191 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size)); 192 config.texture_copy.size / input_width * (input_width + input_gap);
193 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
194 static_cast<u32>(contiguous_input_size));
192 195
193 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); 196 size_t contiguous_output_size =
194 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size)); 197 config.texture_copy.size / output_width * (output_width + output_gap);
198 Memory::RasterizerFlushAndInvalidateRegion(
199 config.GetPhysicalOutputAddress(),
200 static_cast<u32>(contiguous_output_size));
195 201
196 u32 remaining_size = config.texture_copy.size; 202 u32 remaining_size = config.texture_copy.size;
197 u32 remaining_input = input_width; 203 u32 remaining_input = input_width;
198 u32 remaining_output = output_width; 204 u32 remaining_output = output_width;
199 while (remaining_size > 0) { 205 while (remaining_size > 0) {
200 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); 206 u32 copy_size =
207 std::min({remaining_input, remaining_output, remaining_size});
201 208
202 std::memcpy(dst_pointer, src_pointer, copy_size); 209 std::memcpy(dst_pointer, src_pointer, copy_size);
203 src_pointer += copy_size; 210 src_pointer += copy_size;
@@ -217,10 +224,11 @@ inline void Write(u32 addr, const T data) {
217 } 224 }
218 } 225 }
219 226
220 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", 227 LOG_TRACE(
221 config.texture_copy.size, 228 HW_GPU,
222 config.GetPhysicalInputAddress(), input_width, input_gap, 229 "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
223 config.GetPhysicalOutputAddress(), output_width, output_gap, 230 config.texture_copy.size, config.GetPhysicalInputAddress(), input_width,
231 input_gap, config.GetPhysicalOutputAddress(), output_width, output_gap,
224 config.flags); 232 config.flags);
225 233
226 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 234 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
@@ -228,7 +236,8 @@ inline void Write(u32 addr, const T data) {
228 } 236 }
229 237
230 if (config.scaling > config.ScaleXY) { 238 if (config.scaling > config.ScaleXY) {
231 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 239 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u",
240 config.scaling.Value());
232 UNIMPLEMENTED(); 241 UNIMPLEMENTED();
233 break; 242 break;
234 } 243 }
@@ -245,11 +254,14 @@ inline void Write(u32 addr, const T data) {
245 u32 output_width = config.output_width >> horizontal_scale; 254 u32 output_width = config.output_width >> horizontal_scale;
246 u32 output_height = config.output_height >> vertical_scale; 255 u32 output_height = config.output_height >> vertical_scale;
247 256
248 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); 257 u32 input_size = config.input_width * config.input_height *
249 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); 258 GPU::Regs::BytesPerPixel(config.input_format);
259 u32 output_size =
260 output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
250 261
251 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); 262 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
252 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); 263 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
264 output_size);
253 265
254 for (u32 y = 0; y < output_height; ++y) { 266 for (u32 y = 0; y < output_height; ++y) {
255 for (u32 x = 0; x < output_width; ++x) { 267 for (u32 x = 0; x < output_width; ++x) {
@@ -278,11 +290,14 @@ inline void Write(u32 addr, const T data) {
278 u32 coarse_y = y & ~7; 290 u32 coarse_y = y & ~7;
279 u32 stride = output_width * dst_bytes_per_pixel; 291 u32 stride = output_width * dst_bytes_per_pixel;
280 292
281 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 293 src_offset =
282 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; 294 (input_x + input_y * config.input_width) * src_bytes_per_pixel;
295 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
296 coarse_y * stride;
283 } else { 297 } else {
284 // Both input and output are linear 298 // Both input and output are linear
285 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 299 src_offset =
300 (input_x + input_y * config.input_width) * src_bytes_per_pixel;
286 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 301 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
287 } 302 }
288 } else { 303 } else {
@@ -291,7 +306,9 @@ inline void Write(u32 addr, const T data) {
291 u32 coarse_y = input_y & ~7; 306 u32 coarse_y = input_y & ~7;
292 u32 stride = config.input_width * src_bytes_per_pixel; 307 u32 stride = config.input_width * src_bytes_per_pixel;
293 308
294 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; 309 src_offset = VideoCore::GetMortonOffset(input_x, input_y,
310 src_bytes_per_pixel) +
311 coarse_y * stride;
295 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 312 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
296 } else { 313 } else {
297 // Both input and output are tiled 314 // Both input and output are tiled
@@ -301,20 +318,27 @@ inline void Write(u32 addr, const T data) {
301 u32 in_coarse_y = input_y & ~7; 318 u32 in_coarse_y = input_y & ~7;
302 u32 in_stride = config.input_width * src_bytes_per_pixel; 319 u32 in_stride = config.input_width * src_bytes_per_pixel;
303 320
304 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; 321 src_offset = VideoCore::GetMortonOffset(input_x, input_y,
305 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; 322 src_bytes_per_pixel) +
323 in_coarse_y * in_stride;
324 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
325 out_coarse_y * out_stride;
306 } 326 }
307 } 327 }
308 328
309 const u8* src_pixel = src_pointer + src_offset; 329 const u8* src_pixel = src_pointer + src_offset;
310 src_color = DecodePixel(config.input_format, src_pixel); 330 src_color = DecodePixel(config.input_format, src_pixel);
311 if (config.scaling == config.ScaleX) { 331 if (config.scaling == config.ScaleX) {
312 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); 332 Math::Vec4<u8> pixel =
333 DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
313 src_color = ((src_color + pixel) / 2).Cast<u8>(); 334 src_color = ((src_color + pixel) / 2).Cast<u8>();
314 } else if (config.scaling == config.ScaleXY) { 335 } else if (config.scaling == config.ScaleXY) {
315 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); 336 Math::Vec4<u8> pixel1 = DecodePixel(
316 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); 337 config.input_format, src_pixel + 1 * src_bytes_per_pixel);
317 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); 338 Math::Vec4<u8> pixel2 = DecodePixel(
339 config.input_format, src_pixel + 2 * src_bytes_per_pixel);
340 Math::Vec4<u8> pixel3 = DecodePixel(
341 config.input_format, src_pixel + 3 * src_bytes_per_pixel);
318 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); 342 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
319 } 343 }
320 344
@@ -341,17 +365,20 @@ inline void Write(u32 addr, const T data) {
341 break; 365 break;
342 366
343 default: 367 default:
344 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); 368 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x",
369 config.output_format.Value());
345 break; 370 break;
346 } 371 }
347 } 372 }
348 } 373 }
349 374
350 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", 375 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> "
351 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), 376 "0x%08x(%ux%u), dst format %x, flags 0x%08X",
352 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), 377 config.output_height * output_width *
353 config.GetPhysicalOutputAddress(), output_width, output_height, 378 GPU::Regs::BytesPerPixel(config.output_format),
354 config.output_format.Value(), config.flags); 379 config.GetPhysicalInputAddress(), config.input_width.Value(),
380 config.input_height.Value(), config.GetPhysicalOutputAddress(),
381 output_width, output_height, config.output_format.Value(), config.flags);
355 } 382 }
356 383
357 g_regs.display_transfer_config.trigger = 0; 384 g_regs.display_transfer_config.trigger = 0;
@@ -361,17 +388,16 @@ inline void Write(u32 addr, const T data) {
361 } 388 }
362 389
363 // Seems like writing to this register triggers processing 390 // Seems like writing to this register triggers processing
364 case GPU_REG_INDEX(command_processor_config.trigger): 391 case GPU_REG_INDEX(command_processor_config.trigger): {
365 {
366 const auto& config = g_regs.command_processor_config; 392 const auto& config = g_regs.command_processor_config;
367 if (config.trigger & 1) 393 if (config.trigger & 1) {
368 {
369 MICROPROFILE_SCOPE(GPU_CmdlistProcessing); 394 MICROPROFILE_SCOPE(GPU_CmdlistProcessing);
370 395
371 u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress()); 396 u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
372 397
373 if (Pica::g_debug_context && Pica::g_debug_context->recorder) { 398 if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
374 Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress()); 399 Pica::g_debug_context->recorder->MemoryAccessed(
400 (u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
375 } 401 }
376 402
377 Pica::CommandProcessor::ProcessCommandList(buffer, config.size); 403 Pica::CommandProcessor::ProcessCommandList(buffer, config.size);
@@ -389,16 +415,17 @@ inline void Write(u32 addr, const T data) {
389 // This is happening *after* handling the write to make sure we properly catch all memory reads. 415 // This is happening *after* handling the write to make sure we properly catch all memory reads.
390 if (Pica::g_debug_context && Pica::g_debug_context->recorder) { 416 if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
391 // addr + GPU VBase - IO VBase + IO PBase 417 // addr + GPU VBase - IO VBase + IO PBase
392 Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); 418 Pica::g_debug_context->recorder->RegisterWritten<T>(
419 addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
393 } 420 }
394} 421}
395 422
396// Explicitly instantiate template functions because we aren't defining this in the header: 423// Explicitly instantiate template functions because we aren't defining this in the header:
397 424
398template void Read<u64>(u64 &var, const u32 addr); 425template void Read<u64>(u64& var, const u32 addr);
399template void Read<u32>(u32 &var, const u32 addr); 426template void Read<u32>(u32& var, const u32 addr);
400template void Read<u16>(u16 &var, const u32 addr); 427template void Read<u16>(u16& var, const u32 addr);
401template void Read<u8>(u8 &var, const u32 addr); 428template void Read<u8>(u8& var, const u32 addr);
402 429
403template void Write<u64>(u32 addr, const u64 data); 430template void Write<u64>(u32 addr, const u64 data);
404template void Write<u32>(u32 addr, const u32 data); 431template void Write<u32>(u32 addr, const u32 data);
@@ -417,8 +444,9 @@ static void VBlankCallback(u64 userdata, int cycles_late) {
417 // - If frameskip == 0 (disabled), always swap buffers 444 // - If frameskip == 0 (disabled), always swap buffers
418 // - If frameskip == 1, swap buffers every other frame (starting from the first frame) 445 // - If frameskip == 1, swap buffers every other frame (starting from the first frame)
419 // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame) 446 // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame)
420 if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) || 447 if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) &&
421 Settings::values.frame_skip == 0) { 448 last_skip_frame != g_skip_frame) ||
449 Settings::values.frame_skip == 0) {
422 VideoCore::g_renderer->SwapBuffers(); 450 VideoCore::g_renderer->SwapBuffers();
423 } 451 }
424 452
@@ -448,12 +476,12 @@ void Init() {
448 // .. or at least these are the ones used by system applets. 476 // .. or at least these are the ones used by system applets.
449 // There's probably a smarter way to come up with addresses 477 // There's probably a smarter way to come up with addresses
450 // like this which does not require hardcoding. 478 // like this which does not require hardcoding.
451 framebuffer_top.address_left1 = 0x181E6000; 479 framebuffer_top.address_left1 = 0x181E6000;
452 framebuffer_top.address_left2 = 0x1822C800; 480 framebuffer_top.address_left2 = 0x1822C800;
453 framebuffer_top.address_right1 = 0x18273000; 481 framebuffer_top.address_right1 = 0x18273000;
454 framebuffer_top.address_right2 = 0x182B9800; 482 framebuffer_top.address_right2 = 0x182B9800;
455 framebuffer_sub.address_left1 = 0x1848F000; 483 framebuffer_sub.address_left1 = 0x1848F000;
456 framebuffer_sub.address_left2 = 0x184C7800; 484 framebuffer_sub.address_left2 = 0x184C7800;
457 485
458 framebuffer_top.width.Assign(240); 486 framebuffer_top.width.Assign(240);
459 framebuffer_top.height.Assign(400); 487 framebuffer_top.height.Assign(400);
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index da4c345b4..077b6255f 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -17,7 +17,8 @@ namespace GPU {
17// Returns index corresponding to the Regs member labeled by field_name 17// Returns index corresponding to the Regs member labeled by field_name
18// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions 18// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
19// when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])). 19// when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])).
20// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members 20// For details cf.
21// https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
21// Hopefully, this will be fixed sometime in the future. 22// Hopefully, this will be fixed sometime in the future.
22// For lack of better alternatives, we currently hardcode the offsets when constant 23// For lack of better alternatives, we currently hardcode the offsets when constant
23// expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts 24// expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
@@ -30,8 +31,9 @@ namespace GPU {
30// really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX 31// really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX
31// and then performs a (no-op) cast to size_t iff the second argument matches the expected 32// and then performs a (no-op) cast to size_t iff the second argument matches the expected
32// field offset. Otherwise, the compiler will fail to compile this code. 33// field offset. Otherwise, the compiler will fail to compile this code.
33#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ 34#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
34 ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name)) 35 ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type) \
36 GPU_REG_INDEX(field_name))
35#endif 37#endif
36 38
37// MMIO region 0x1EFxxxxx 39// MMIO region 0x1EFxxxxx
@@ -44,18 +46,18 @@ struct Regs {
44// support for that. 46// support for that.
45#define ASSERT_MEMBER_SIZE(name, size_in_bytes) 47#define ASSERT_MEMBER_SIZE(name, size_in_bytes)
46#else 48#else
47#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \ 49#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \
48 static_assert(sizeof(name) == size_in_bytes, \ 50 static_assert(sizeof(name) == size_in_bytes, \
49 "Structure size and register block length don't match") 51 "Structure size and register block length don't match")
50#endif 52#endif
51 53
52 // Components are laid out in reverse byte order, most significant bits first. 54 // Components are laid out in reverse byte order, most significant bits first.
53 enum class PixelFormat : u32 { 55 enum class PixelFormat : u32 {
54 RGBA8 = 0, 56 RGBA8 = 0,
55 RGB8 = 1, 57 RGB8 = 1,
56 RGB565 = 2, 58 RGB565 = 2,
57 RGB5A1 = 3, 59 RGB5A1 = 3,
58 RGBA4 = 4, 60 RGBA4 = 4,
59 }; 61 };
60 62
61 /** 63 /**
@@ -88,8 +90,8 @@ struct Regs {
88 BitField<0, 16, u32> value_16bit; 90 BitField<0, 16, u32> value_16bit;
89 91
90 // TODO: Verify component order 92 // TODO: Verify component order
91 BitField< 0, 8, u32> value_24bit_r; 93 BitField<0, 8, u32> value_24bit_r;
92 BitField< 8, 8, u32> value_24bit_g; 94 BitField<8, 8, u32> value_24bit_g;
93 BitField<16, 8, u32> value_24bit_b; 95 BitField<16, 8, u32> value_24bit_b;
94 }; 96 };
95 97
@@ -126,7 +128,7 @@ struct Regs {
126 union { 128 union {
127 u32 size; 129 u32 size;
128 130
129 BitField< 0, 16, u32> width; 131 BitField<0, 16, u32> width;
130 BitField<16, 16, u32> height; 132 BitField<16, 16, u32> height;
131 }; 133 };
132 134
@@ -138,7 +140,7 @@ struct Regs {
138 union { 140 union {
139 u32 format; 141 u32 format;
140 142
141 BitField< 0, 3, PixelFormat> color_format; 143 BitField<0, 3, PixelFormat> color_format;
142 }; 144 };
143 145
144 INSERT_PADDING_WORDS(0x1); 146 INSERT_PADDING_WORDS(0x1);
@@ -180,35 +182,37 @@ struct Regs {
180 union { 182 union {
181 u32 output_size; 183 u32 output_size;
182 184
183 BitField< 0, 16, u32> output_width; 185 BitField<0, 16, u32> output_width;
184 BitField<16, 16, u32> output_height; 186 BitField<16, 16, u32> output_height;
185 }; 187 };
186 188
187 union { 189 union {
188 u32 input_size; 190 u32 input_size;
189 191
190 BitField< 0, 16, u32> input_width; 192 BitField<0, 16, u32> input_width;
191 BitField<16, 16, u32> input_height; 193 BitField<16, 16, u32> input_height;
192 }; 194 };
193 195
194 enum ScalingMode : u32 { 196 enum ScalingMode : u32 {
195 NoScale = 0, // Doesn't scale the image 197 NoScale = 0, // Doesn't scale the image
196 ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter 198 ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter
197 ScaleXY = 2, // Downscales the image in half in both the X and Y axes and applies a box filter 199 ScaleXY =
200 2, // Downscales the image in half in both the X and Y axes and applies a box filter
198 }; 201 };
199 202
200 union { 203 union {
201 u32 flags; 204 u32 flags;
202 205
203 BitField< 0, 1, u32> flip_vertically; // flips input data vertically 206 BitField<0, 1, u32> flip_vertically; // flips input data vertically
204 BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format 207 BitField<1, 1, u32> input_linear; // Converts from linear to tiled format
205 BitField< 2, 1, u32> crop_input_lines; 208 BitField<2, 1, u32> crop_input_lines;
206 BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields 209 BitField<3, 1, u32> is_texture_copy; // Copies the data without performing any
207 BitField< 5, 1, u32> dont_swizzle; 210 // processing and respecting texture copy fields
208 BitField< 8, 3, PixelFormat> input_format; 211 BitField<5, 1, u32> dont_swizzle;
212 BitField<8, 3, PixelFormat> input_format;
209 BitField<12, 3, PixelFormat> output_format; 213 BitField<12, 3, PixelFormat> output_format;
210 /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. 214 /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
211 BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented 215 BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
212 BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer 216 BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
213 }; 217 };
214 218
@@ -225,14 +229,14 @@ struct Regs {
225 union { 229 union {
226 u32 input_size; 230 u32 input_size;
227 231
228 BitField< 0, 16, u32> input_width; 232 BitField<0, 16, u32> input_width;
229 BitField<16, 16, u32> input_gap; 233 BitField<16, 16, u32> input_gap;
230 }; 234 };
231 235
232 union { 236 union {
233 u32 output_size; 237 u32 output_size;
234 238
235 BitField< 0, 16, u32> output_width; 239 BitField<0, 16, u32> output_width;
236 BitField<16, 16, u32> output_gap; 240 BitField<16, 16, u32> output_gap;
237 }; 241 };
238 } texture_copy; 242 } texture_copy;
@@ -267,12 +271,12 @@ struct Regs {
267 return sizeof(Regs) / sizeof(u32); 271 return sizeof(Regs) / sizeof(u32);
268 } 272 }
269 273
270 const u32& operator [] (int index) const { 274 const u32& operator[](int index) const {
271 const u32* content = reinterpret_cast<const u32*>(this); 275 const u32* content = reinterpret_cast<const u32*>(this);
272 return content[index]; 276 return content[index];
273 } 277 }
274 278
275 u32& operator [] (int index) { 279 u32& operator[](int index) {
276 u32* content = reinterpret_cast<u32*>(this); 280 u32* content = reinterpret_cast<u32*>(this);
277 return content[index]; 281 return content[index];
278 } 282 }
@@ -294,28 +298,29 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan
294// is technically allowed since C++11. This macro should be enabled once MSVC adds 298// is technically allowed since C++11. This macro should be enabled once MSVC adds
295// support for that. 299// support for that.
296#ifndef _MSC_VER 300#ifndef _MSC_VER
297#define ASSERT_REG_POSITION(field_name, position) \ 301#define ASSERT_REG_POSITION(field_name, position) \
298 static_assert(offsetof(Regs, field_name) == position * 4, \ 302 static_assert(offsetof(Regs, field_name) == position * 4, \
299 "Field "#field_name" has invalid position") 303 "Field " #field_name " has invalid position")
300 304
301ASSERT_REG_POSITION(memory_fill_config[0], 0x00004); 305ASSERT_REG_POSITION(memory_fill_config[0], 0x00004);
302ASSERT_REG_POSITION(memory_fill_config[1], 0x00008); 306ASSERT_REG_POSITION(memory_fill_config[1], 0x00008);
303ASSERT_REG_POSITION(framebuffer_config[0], 0x00117); 307ASSERT_REG_POSITION(framebuffer_config[0], 0x00117);
304ASSERT_REG_POSITION(framebuffer_config[1], 0x00157); 308ASSERT_REG_POSITION(framebuffer_config[1], 0x00157);
305ASSERT_REG_POSITION(display_transfer_config, 0x00300); 309ASSERT_REG_POSITION(display_transfer_config, 0x00300);
306ASSERT_REG_POSITION(command_processor_config, 0x00638); 310ASSERT_REG_POSITION(command_processor_config, 0x00638);
307 311
308#undef ASSERT_REG_POSITION 312#undef ASSERT_REG_POSITION
309#endif // !defined(_MSC_VER) 313#endif // !defined(_MSC_VER)
310 314
311// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. 315// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value
316// anyway.
312static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set"); 317static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set");
313 318
314extern Regs g_regs; 319extern Regs g_regs;
315extern bool g_skip_frame; 320extern bool g_skip_frame;
316 321
317template <typename T> 322template <typename T>
318void Read(T &var, const u32 addr); 323void Read(T& var, const u32 addr);
319 324
320template <typename T> 325template <typename T>
321void Write(u32 addr, const T data); 326void Write(u32 addr, const T data);
@@ -326,5 +331,4 @@ void Init();
326/// Shutdown hardware 331/// Shutdown hardware
327void Shutdown(); 332void Shutdown();
328 333
329
330} // namespace 334} // namespace
diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp
index b5fdbf9c1..db224c9aa 100644
--- a/src/core/hw/hw.cpp
+++ b/src/core/hw/hw.cpp
@@ -5,14 +5,14 @@
5#include "common/common_types.h" 5#include "common/common_types.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7 7
8#include "core/hw/hw.h"
9#include "core/hw/gpu.h" 8#include "core/hw/gpu.h"
9#include "core/hw/hw.h"
10#include "core/hw/lcd.h" 10#include "core/hw/lcd.h"
11 11
12namespace HW { 12namespace HW {
13 13
14template <typename T> 14template <typename T>
15inline void Read(T &var, const u32 addr) { 15inline void Read(T& var, const u32 addr) {
16 switch (addr & 0xFFFFF000) { 16 switch (addr & 0xFFFFF000) {
17 case VADDR_GPU: 17 case VADDR_GPU:
18 case VADDR_GPU + 0x1000: 18 case VADDR_GPU + 0x1000:
@@ -71,10 +71,10 @@ inline void Write(u32 addr, const T data) {
71 71
72// Explicitly instantiate template functions because we aren't defining this in the header: 72// Explicitly instantiate template functions because we aren't defining this in the header:
73 73
74template void Read<u64>(u64 &var, const u32 addr); 74template void Read<u64>(u64& var, const u32 addr);
75template void Read<u32>(u32 &var, const u32 addr); 75template void Read<u32>(u32& var, const u32 addr);
76template void Read<u16>(u16 &var, const u32 addr); 76template void Read<u16>(u16& var, const u32 addr);
77template void Read<u8>(u8 &var, const u32 addr); 77template void Read<u8>(u8& var, const u32 addr);
78 78
79template void Write<u64>(u32 addr, const u64 data); 79template void Write<u64>(u32 addr, const u64 data);
80template void Write<u32>(u32 addr, const u32 data); 80template void Write<u32>(u32 addr, const u32 data);
@@ -98,5 +98,4 @@ void Shutdown() {
98 LCD::Shutdown(); 98 LCD::Shutdown();
99 LOG_DEBUG(HW, "shutdown OK"); 99 LOG_DEBUG(HW, "shutdown OK");
100} 100}
101
102} 101}
diff --git a/src/core/hw/hw.h b/src/core/hw/hw.h
index d65608910..a3c5d2ea3 100644
--- a/src/core/hw/hw.h
+++ b/src/core/hw/hw.h
@@ -10,30 +10,30 @@ namespace HW {
10 10
11/// Beginnings of IO register regions, in the user VA space. 11/// Beginnings of IO register regions, in the user VA space.
12enum : u32 { 12enum : u32 {
13 VADDR_HASH = 0x1EC01000, 13 VADDR_HASH = 0x1EC01000,
14 VADDR_CSND = 0x1EC03000, 14 VADDR_CSND = 0x1EC03000,
15 VADDR_DSP = 0x1EC40000, 15 VADDR_DSP = 0x1EC40000,
16 VADDR_PDN = 0x1EC41000, 16 VADDR_PDN = 0x1EC41000,
17 VADDR_CODEC = 0x1EC41000, 17 VADDR_CODEC = 0x1EC41000,
18 VADDR_SPI = 0x1EC42000, 18 VADDR_SPI = 0x1EC42000,
19 VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM? 19 VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM?
20 VADDR_I2C = 0x1EC44000, 20 VADDR_I2C = 0x1EC44000,
21 VADDR_CODEC_2 = 0x1EC45000, 21 VADDR_CODEC_2 = 0x1EC45000,
22 VADDR_HID = 0x1EC46000, 22 VADDR_HID = 0x1EC46000,
23 VADDR_GPIO = 0x1EC47000, 23 VADDR_GPIO = 0x1EC47000,
24 VADDR_I2C_2 = 0x1EC48000, 24 VADDR_I2C_2 = 0x1EC48000,
25 VADDR_SPI_3 = 0x1EC60000, 25 VADDR_SPI_3 = 0x1EC60000,
26 VADDR_I2C_3 = 0x1EC61000, 26 VADDR_I2C_3 = 0x1EC61000,
27 VADDR_MIC = 0x1EC62000, 27 VADDR_MIC = 0x1EC62000,
28 VADDR_PXI = 0x1EC63000, 28 VADDR_PXI = 0x1EC63000,
29 VADDR_LCD = 0x1ED02000, 29 VADDR_LCD = 0x1ED02000,
30 VADDR_DSP_2 = 0x1ED03000, 30 VADDR_DSP_2 = 0x1ED03000,
31 VADDR_HASH_2 = 0x1EE01000, 31 VADDR_HASH_2 = 0x1EE01000,
32 VADDR_GPU = 0x1EF00000, 32 VADDR_GPU = 0x1EF00000,
33}; 33};
34 34
35template <typename T> 35template <typename T>
36void Read(T &var, const u32 addr); 36void Read(T& var, const u32 addr);
37 37
38template <typename T> 38template <typename T>
39void Write(u32 addr, const T data); 39void Write(u32 addr, const T data);
diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp
index 6f93709e3..0e3aa7cfd 100644
--- a/src/core/hw/lcd.cpp
+++ b/src/core/hw/lcd.cpp
@@ -18,7 +18,7 @@ namespace LCD {
18Regs g_regs; 18Regs g_regs;
19 19
20template <typename T> 20template <typename T>
21inline void Read(T &var, const u32 raw_addr) { 21inline void Read(T& var, const u32 raw_addr) {
22 u32 addr = raw_addr - HW::VADDR_LCD; 22 u32 addr = raw_addr - HW::VADDR_LCD;
23 u32 index = addr / 4; 23 u32 index = addr / 4;
24 24
@@ -48,16 +48,17 @@ inline void Write(u32 addr, const T data) {
48 // This is happening *after* handling the write to make sure we properly catch all memory reads. 48 // This is happening *after* handling the write to make sure we properly catch all memory reads.
49 if (Pica::g_debug_context && Pica::g_debug_context->recorder) { 49 if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
50 // addr + GPU VBase - IO VBase + IO PBase 50 // addr + GPU VBase - IO VBase + IO PBase
51 Pica::g_debug_context->recorder->RegisterWritten<T>(addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data); 51 Pica::g_debug_context->recorder->RegisterWritten<T>(
52 addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data);
52 } 53 }
53} 54}
54 55
55// Explicitly instantiate template functions because we aren't defining this in the header: 56// Explicitly instantiate template functions because we aren't defining this in the header:
56 57
57template void Read<u64>(u64 &var, const u32 addr); 58template void Read<u64>(u64& var, const u32 addr);
58template void Read<u32>(u32 &var, const u32 addr); 59template void Read<u32>(u32& var, const u32 addr);
59template void Read<u16>(u16 &var, const u32 addr); 60template void Read<u16>(u16& var, const u32 addr);
60template void Read<u8>(u8 &var, const u32 addr); 61template void Read<u8>(u8& var, const u32 addr);
61 62
62template void Write<u64>(u32 addr, const u64 data); 63template void Write<u64>(u32 addr, const u64 data);
63template void Write<u32>(u32 addr, const u32 data); 64template void Write<u32>(u32 addr, const u32 data);
diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h
index 57029c5e8..404833165 100644
--- a/src/core/hw/lcd.h
+++ b/src/core/hw/lcd.h
@@ -42,16 +42,15 @@ struct Regs {
42 return sizeof(Regs) / sizeof(u32); 42 return sizeof(Regs) / sizeof(u32);
43 } 43 }
44 44
45 const u32& operator [] (int index) const { 45 const u32& operator[](int index) const {
46 const u32* content = reinterpret_cast<const u32*>(this); 46 const u32* content = reinterpret_cast<const u32*>(this);
47 return content[index]; 47 return content[index];
48 } 48 }
49 49
50 u32& operator [] (int index) { 50 u32& operator[](int index) {
51 u32* content = reinterpret_cast<u32*>(this); 51 u32* content = reinterpret_cast<u32*>(this);
52 return content[index]; 52 return content[index];
53 } 53 }
54
55}; 54};
56static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); 55static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
57 56
@@ -59,14 +58,14 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan
59// is technically allowed since C++11. This macro should be enabled once MSVC adds 58// is technically allowed since C++11. This macro should be enabled once MSVC adds
60// support for that. 59// support for that.
61#ifndef _MSC_VER 60#ifndef _MSC_VER
62#define ASSERT_REG_POSITION(field_name, position) \ 61#define ASSERT_REG_POSITION(field_name, position) \
63 static_assert(offsetof(Regs, field_name) == position * 4, \ 62 static_assert(offsetof(Regs, field_name) == position * 4, \
64 "Field "#field_name" has invalid position") 63 "Field " #field_name " has invalid position")
65 64
66ASSERT_REG_POSITION(color_fill_top, 0x81); 65ASSERT_REG_POSITION(color_fill_top, 0x81);
67ASSERT_REG_POSITION(backlight_top, 0x90); 66ASSERT_REG_POSITION(backlight_top, 0x90);
68ASSERT_REG_POSITION(color_fill_bottom, 0x281); 67ASSERT_REG_POSITION(color_fill_bottom, 0x281);
69ASSERT_REG_POSITION(backlight_bottom, 0x290); 68ASSERT_REG_POSITION(backlight_bottom, 0x290);
70 69
71#undef ASSERT_REG_POSITION 70#undef ASSERT_REG_POSITION
72#endif // !defined(_MSC_VER) 71#endif // !defined(_MSC_VER)
@@ -74,7 +73,7 @@ ASSERT_REG_POSITION(backlight_bottom, 0x290);
74extern Regs g_regs; 73extern Regs g_regs;
75 74
76template <typename T> 75template <typename T>
77void Read(T &var, const u32 addr); 76void Read(T& var, const u32 addr);
78 77
79template <typename T> 78template <typename T>
80void Write(u32 addr, const T data); 79void Write(u32 addr, const T data);
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
index 083391e83..5a68d7e65 100644
--- a/src/core/hw/y2r.cpp
+++ b/src/core/hw/y2r.cpp
@@ -27,9 +27,9 @@ static const size_t TILE_SIZE = 8 * 8;
27using ImageTile = std::array<u32, TILE_SIZE>; 27using ImageTile = std::array<u32, TILE_SIZE>;
28 28
29/// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. 29/// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles.
30static void ConvertYUVToRGB(InputFormat input_format, 30static void ConvertYUVToRGB(InputFormat input_format, const u8* input_Y, const u8* input_U,
31 const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[], 31 const u8* input_V, ImageTile output[], unsigned int width,
32 unsigned int width, unsigned int height, const CoefficientSet& coefficients) { 32 unsigned int height, const CoefficientSet& coefficients) {
33 33
34 for (unsigned int y = 0; y < height; ++y) { 34 for (unsigned int y = 0; y < height; ++y) {
35 for (unsigned int x = 0; x < width; ++x) { 35 for (unsigned int x = 0; x < width; ++x) {
@@ -58,11 +58,11 @@ static void ConvertYUVToRGB(InputFormat input_format,
58 58
59 // This conversion process is bit-exact with hardware, as far as could be tested. 59 // This conversion process is bit-exact with hardware, as far as could be tested.
60 auto& c = coefficients; 60 auto& c = coefficients;
61 s32 cY = c[0]*Y; 61 s32 cY = c[0] * Y;
62 62
63 s32 r = cY + c[1]*V; 63 s32 r = cY + c[1] * V;
64 s32 g = cY - c[3]*U - c[2]*V; 64 s32 g = cY - c[3] * U - c[2] * V;
65 s32 b = cY + c[4]*U; 65 s32 b = cY + c[4] * U;
66 66
67 const s32 rounding_offset = 0x18; 67 const s32 rounding_offset = 0x18;
68 r = (r >> 3) + c[5] + rounding_offset; 68 r = (r >> 3) + c[5] + rounding_offset;
@@ -74,14 +74,14 @@ static void ConvertYUVToRGB(InputFormat input_format,
74 u32* out = &output[tile][y * 8 + tile_x]; 74 u32* out = &output[tile][y * 8 + tile_x];
75 75
76 using MathUtil::Clamp; 76 using MathUtil::Clamp;
77 *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | 77 *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
78 ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
79 ((u32)Clamp(b >> 5, 0, 0xFF) << 8); 78 ((u32)Clamp(b >> 5, 0, 0xFF) << 8);
80 } 79 }
81 } 80 }
82} 81}
83 82
84/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit. 83/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit
84/// formats to 8-bit.
85template <size_t N> 85template <size_t N>
86static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { 86static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) {
87 const u8* input = Memory::GetPointer(buf.address); 87 const u8* input = Memory::GetPointer(buf.address);
@@ -103,9 +103,10 @@ static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data
103 } 103 }
104} 104}
105 105
106/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer. 106/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA
107/// transfer.
107static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, 108static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data,
108 OutputFormat output_format, u8 alpha) { 109 OutputFormat output_format, u8 alpha) {
109 110
110 u8* output = Memory::GetPointer(buf.address); 111 u8* output = Memory::GetPointer(buf.address);
111 112
@@ -113,9 +114,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
113 u8* unit_end = output + buf.transfer_unit; 114 u8* unit_end = output + buf.transfer_unit;
114 while (output < unit_end) { 115 while (output < unit_end) {
115 u32 color = *input++; 116 u32 color = *input++;
116 Math::Vec4<u8> col_vec{ 117 Math::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha};
117 (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha
118 };
119 118
120 switch (output_format) { 119 switch (output_format) {
121 case OutputFormat::RGBA8: 120 case OutputFormat::RGBA8:
@@ -146,34 +145,26 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
146} 145}
147 146
148static const u8 linear_lut[64] = { 147static const u8 linear_lut[64] = {
149 0, 1, 2, 3, 4, 5, 6, 7, 148 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
150 8, 9, 10, 11, 12, 13, 14, 15, 149 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
151 16, 17, 18, 19, 20, 21, 22, 23, 150 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
152 24, 25, 26, 27, 28, 29, 30, 31,
153 32, 33, 34, 35, 36, 37, 38, 39,
154 40, 41, 42, 43, 44, 45, 46, 47,
155 48, 49, 50, 51, 52, 53, 54, 55,
156 56, 57, 58, 59, 60, 61, 62, 63,
157}; 151};
158 152
159static const u8 morton_lut[64] = { 153static const u8 morton_lut[64] = {
160 0, 1, 4, 5, 16, 17, 20, 21, 154 0, 1, 4, 5, 16, 17, 20, 21, 2, 3, 6, 7, 18, 19, 22, 23, 8, 9, 12, 13, 24, 25,
161 2, 3, 6, 7, 18, 19, 22, 23, 155 28, 29, 10, 11, 14, 15, 26, 27, 30, 31, 32, 33, 36, 37, 48, 49, 52, 53, 34, 35, 38, 39,
162 8, 9, 12, 13, 24, 25, 28, 29, 156 50, 51, 54, 55, 40, 41, 44, 45, 56, 57, 60, 61, 42, 43, 46, 47, 58, 59, 62, 63,
163 10, 11, 14, 15, 26, 27, 30, 31,
164 32, 33, 36, 37, 48, 49, 52, 53,
165 34, 35, 38, 39, 50, 51, 54, 55,
166 40, 41, 44, 45, 56, 57, 60, 61,
167 42, 43, 46, 47, 58, 59, 62, 63,
168}; 157};
169 158
170static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 159static void RotateTile0(const ImageTile& input, ImageTile& output, int height,
160 const u8 out_map[64]) {
171 for (int i = 0; i < height * 8; ++i) { 161 for (int i = 0; i < height * 8; ++i) {
172 output[out_map[i]] = input[i]; 162 output[out_map[i]] = input[i];
173 } 163 }
174} 164}
175 165
176static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 166static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
167 const u8 out_map[64]) {
177 int out_i = 0; 168 int out_i = 0;
178 for (int x = 0; x < 8; ++x) { 169 for (int x = 0; x < 8; ++x) {
179 for (int y = height - 1; y >= 0; --y) { 170 for (int y = height - 1; y >= 0; --y) {
@@ -182,16 +173,18 @@ static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
182 } 173 }
183} 174}
184 175
185static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 176static void RotateTile180(const ImageTile& input, ImageTile& output, int height,
177 const u8 out_map[64]) {
186 int out_i = 0; 178 int out_i = 0;
187 for (int i = height * 8 - 1; i >= 0; --i) { 179 for (int i = height * 8 - 1; i >= 0; --i) {
188 output[out_map[out_i++]] = input[i]; 180 output[out_map[out_i++]] = input[i];
189 } 181 }
190} 182}
191 183
192static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 184static void RotateTile270(const ImageTile& input, ImageTile& output, int height,
185 const u8 out_map[64]) {
193 int out_i = 0; 186 int out_i = 0;
194 for (int x = 8-1; x >= 0; --x) { 187 for (int x = 8 - 1; x >= 0; --x) {
195 for (int y = 0; y < height; ++y) { 188 for (int y = 0; y < height; ++y) {
196 output[out_map[out_i++]] = input[y * 8 + x]; 189 output[out_map[out_i++]] = input[y * 8 + x];
197 } 190 }
@@ -274,9 +267,11 @@ void PerformConversion(ConversionConfiguration& cvt) {
274 const u8* tile_remap = nullptr; 267 const u8* tile_remap = nullptr;
275 switch (cvt.block_alignment) { 268 switch (cvt.block_alignment) {
276 case BlockAlignment::Linear: 269 case BlockAlignment::Linear:
277 tile_remap = linear_lut; break; 270 tile_remap = linear_lut;
271 break;
278 case BlockAlignment::Block8x8: 272 case BlockAlignment::Block8x8:
279 tile_remap = morton_lut; break; 273 tile_remap = morton_lut;
274 break;
280 } 275 }
281 276
282 for (unsigned int y = 0; y < cvt.input_lines; y += 8) { 277 for (unsigned int y = 0; y < cvt.input_lines; y += 8) {
@@ -320,7 +315,7 @@ void PerformConversion(ConversionConfiguration& cvt) {
320 // Note(yuriks): If additional optimization is required, input_format can be moved to a 315 // Note(yuriks): If additional optimization is required, input_format can be moved to a
321 // template parameter, so that its dispatch can be moved to outside the inner loop. 316 // template parameter, so that its dispatch can be moved to outside the inner loop.
322 ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), 317 ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(),
323 cvt.input_line_width, row_height, cvt.coefficients); 318 cvt.input_line_width, row_height, cvt.coefficients);
324 319
325 u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); 320 u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get());
326 321
@@ -367,9 +362,9 @@ void PerformConversion(ConversionConfiguration& cvt) {
367 362
368 // Note(yuriks): If additional optimization is required, output_format can be moved to a 363 // Note(yuriks): If additional optimization is required, output_format can be moved to a
369 // template parameter, so that its dispatch can be moved to outside the inner loop. 364 // template parameter, so that its dispatch can be moved to outside the inner loop.
370 SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha); 365 SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size,
366 cvt.output_format, (u8)cvt.alpha);
371 } 367 }
372} 368}
373
374} 369}
375} 370}
diff --git a/src/core/hw/y2r.h b/src/core/hw/y2r.h
index 729e1eee3..6b6e71bec 100644
--- a/src/core/hw/y2r.h
+++ b/src/core/hw/y2r.h
@@ -3,13 +3,12 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5namespace Y2R_U { 5namespace Y2R_U {
6 struct ConversionConfiguration; 6struct ConversionConfiguration;
7} 7}
8 8
9namespace HW { 9namespace HW {
10namespace Y2R { 10namespace Y2R {
11 11
12void PerformConversion(Y2R_U::ConversionConfiguration& cvt); 12void PerformConversion(Y2R_U::ConversionConfiguration& cvt);
13
14} 13}
15} 14}