summaryrefslogtreecommitdiff
path: root/src/core/hw
diff options
context:
space:
mode:
authorGravatar Yuri Kunde Schlesner2016-09-21 11:29:48 -0700
committerGravatar GitHub2016-09-21 11:29:48 -0700
commitd5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a (patch)
tree8a22ca73ff838f3f0090b29a548ae81087fc90ed /src/core/hw
parentREADME: Specify master branch for Travis CI badge (diff)
parentFix Travis clang-format check (diff)
downloadyuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.gz
yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.tar.xz
yuzu-d5d2ca8058a0f1c00ab7ca9fe2c058ba47546c0a.zip
Merge pull request #2086 from linkmauve/clang-format
Add clang-format as part of our {commit,travis}-time checks
Diffstat (limited to 'src/core/hw')
-rw-r--r--src/core/hw/gpu.cpp151
-rw-r--r--src/core/hw/gpu.h85
-rw-r--r--src/core/hw/hw.cpp17
-rw-r--r--src/core/hw/hw.h42
-rw-r--r--src/core/hw/lcd.cpp16
-rw-r--r--src/core/hw/lcd.h20
-rw-r--r--src/core/hw/y2r.cpp65
-rw-r--r--src/core/hw/y2r.h3
8 files changed, 211 insertions, 188 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index a4dfb7e43..0e6b91e3a 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -5,34 +5,26 @@
5#include <cstring> 5#include <cstring>
6#include <numeric> 6#include <numeric>
7#include <type_traits> 7#include <type_traits>
8
9#include "common/color.h" 8#include "common/color.h"
10#include "common/common_types.h" 9#include "common/common_types.h"
11#include "common/logging/log.h" 10#include "common/logging/log.h"
12#include "common/microprofile.h" 11#include "common/microprofile.h"
13#include "common/vector_math.h" 12#include "common/vector_math.h"
14
15#include "core/settings.h"
16#include "core/memory.h"
17#include "core/core_timing.h" 13#include "core/core_timing.h"
18
19#include "core/hle/service/gsp_gpu.h" 14#include "core/hle/service/gsp_gpu.h"
20#include "core/hle/service/hid/hid.h" 15#include "core/hle/service/hid/hid.h"
21
22#include "core/hw/hw.h"
23#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
24 17#include "core/hw/hw.h"
18#include "core/memory.h"
19#include "core/settings.h"
25#include "core/tracer/recorder.h" 20#include "core/tracer/recorder.h"
26
27#include "video_core/command_processor.h" 21#include "video_core/command_processor.h"
22#include "video_core/debug_utils/debug_utils.h"
28#include "video_core/rasterizer_interface.h" 23#include "video_core/rasterizer_interface.h"
29#include "video_core/renderer_base.h" 24#include "video_core/renderer_base.h"
30#include "video_core/utils.h" 25#include "video_core/utils.h"
31#include "video_core/video_core.h" 26#include "video_core/video_core.h"
32 27
33#include "video_core/debug_utils/debug_utils.h"
34
35
36namespace GPU { 28namespace GPU {
37 29
38Regs g_regs; 30Regs g_regs;
@@ -49,7 +41,7 @@ static u64 frame_count;
49static bool last_skip_frame; 41static bool last_skip_frame;
50 42
51template <typename T> 43template <typename T>
52inline void Read(T &var, const u32 raw_addr) { 44inline void Read(T& var, const u32 raw_addr) {
53 u32 addr = raw_addr - HW::VADDR_GPU; 45 u32 addr = raw_addr - HW::VADDR_GPU;
54 u32 index = addr / 4; 46 u32 index = addr / 4;
55 47
@@ -105,8 +97,7 @@ inline void Write(u32 addr, const T data) {
105 97
106 // Memory fills are triggered once the fill value is written. 98 // Memory fills are triggered once the fill value is written.
107 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3): 99 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3):
108 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): 100 case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): {
109 {
110 const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); 101 const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
111 auto& config = g_regs.memory_fill_config[is_second_filler]; 102 auto& config = g_regs.memory_fill_config[is_second_filler];
112 103
@@ -125,7 +116,9 @@ inline void Write(u32 addr, const T data) {
125 // regions that were between surfaces or within the touching 116 // regions that were between surfaces or within the touching
126 // ones for cpu to manually fill here. 117 // ones for cpu to manually fill here.
127 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) { 118 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
128 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress()); 119 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
120 config.GetEndAddress() -
121 config.GetStartAddress());
129 122
130 if (config.fill_24bit) { 123 if (config.fill_24bit) {
131 // fill with 24-bit values 124 // fill with 24-bit values
@@ -150,7 +143,8 @@ inline void Write(u32 addr, const T data) {
150 } 143 }
151 } 144 }
152 145
153 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); 146 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(),
147 config.GetEndAddress());
154 148
155 if (!is_second_filler) { 149 if (!is_second_filler) {
156 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); 150 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
@@ -167,15 +161,15 @@ inline void Write(u32 addr, const T data) {
167 break; 161 break;
168 } 162 }
169 163
170 case GPU_REG_INDEX(display_transfer_config.trigger): 164 case GPU_REG_INDEX(display_transfer_config.trigger): {
171 {
172 MICROPROFILE_SCOPE(GPU_DisplayTransfer); 165 MICROPROFILE_SCOPE(GPU_DisplayTransfer);
173 166
174 const auto& config = g_regs.display_transfer_config; 167 const auto& config = g_regs.display_transfer_config;
175 if (config.trigger & 1) { 168 if (config.trigger & 1) {
176 169
177 if (Pica::g_debug_context) 170 if (Pica::g_debug_context)
178 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); 171 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer,
172 nullptr);
179 173
180 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) { 174 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
181 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 175 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
@@ -187,17 +181,23 @@ inline void Write(u32 addr, const T data) {
187 u32 output_width = config.texture_copy.output_width * 16; 181 u32 output_width = config.texture_copy.output_width * 16;
188 u32 output_gap = config.texture_copy.output_gap * 16; 182 u32 output_gap = config.texture_copy.output_gap * 16;
189 183
190 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); 184 size_t contiguous_input_size =
191 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size)); 185 config.texture_copy.size / input_width * (input_width + input_gap);
186 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
187 static_cast<u32>(contiguous_input_size));
192 188
193 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); 189 size_t contiguous_output_size =
194 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size)); 190 config.texture_copy.size / output_width * (output_width + output_gap);
191 Memory::RasterizerFlushAndInvalidateRegion(
192 config.GetPhysicalOutputAddress(),
193 static_cast<u32>(contiguous_output_size));
195 194
196 u32 remaining_size = config.texture_copy.size; 195 u32 remaining_size = config.texture_copy.size;
197 u32 remaining_input = input_width; 196 u32 remaining_input = input_width;
198 u32 remaining_output = output_width; 197 u32 remaining_output = output_width;
199 while (remaining_size > 0) { 198 while (remaining_size > 0) {
200 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); 199 u32 copy_size =
200 std::min({remaining_input, remaining_output, remaining_size});
201 201
202 std::memcpy(dst_pointer, src_pointer, copy_size); 202 std::memcpy(dst_pointer, src_pointer, copy_size);
203 src_pointer += copy_size; 203 src_pointer += copy_size;
@@ -217,10 +217,11 @@ inline void Write(u32 addr, const T data) {
217 } 217 }
218 } 218 }
219 219
220 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", 220 LOG_TRACE(
221 config.texture_copy.size, 221 HW_GPU,
222 config.GetPhysicalInputAddress(), input_width, input_gap, 222 "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
223 config.GetPhysicalOutputAddress(), output_width, output_gap, 223 config.texture_copy.size, config.GetPhysicalInputAddress(), input_width,
224 input_gap, config.GetPhysicalOutputAddress(), output_width, output_gap,
224 config.flags); 225 config.flags);
225 226
226 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 227 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
@@ -228,7 +229,8 @@ inline void Write(u32 addr, const T data) {
228 } 229 }
229 230
230 if (config.scaling > config.ScaleXY) { 231 if (config.scaling > config.ScaleXY) {
231 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 232 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u",
233 config.scaling.Value());
232 UNIMPLEMENTED(); 234 UNIMPLEMENTED();
233 break; 235 break;
234 } 236 }
@@ -245,11 +247,14 @@ inline void Write(u32 addr, const T data) {
245 u32 output_width = config.output_width >> horizontal_scale; 247 u32 output_width = config.output_width >> horizontal_scale;
246 u32 output_height = config.output_height >> vertical_scale; 248 u32 output_height = config.output_height >> vertical_scale;
247 249
248 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); 250 u32 input_size = config.input_width * config.input_height *
249 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); 251 GPU::Regs::BytesPerPixel(config.input_format);
252 u32 output_size =
253 output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
250 254
251 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); 255 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
252 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); 256 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
257 output_size);
253 258
254 for (u32 y = 0; y < output_height; ++y) { 259 for (u32 y = 0; y < output_height; ++y) {
255 for (u32 x = 0; x < output_width; ++x) { 260 for (u32 x = 0; x < output_width; ++x) {
@@ -278,11 +283,14 @@ inline void Write(u32 addr, const T data) {
278 u32 coarse_y = y & ~7; 283 u32 coarse_y = y & ~7;
279 u32 stride = output_width * dst_bytes_per_pixel; 284 u32 stride = output_width * dst_bytes_per_pixel;
280 285
281 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 286 src_offset =
282 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; 287 (input_x + input_y * config.input_width) * src_bytes_per_pixel;
288 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
289 coarse_y * stride;
283 } else { 290 } else {
284 // Both input and output are linear 291 // Both input and output are linear
285 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 292 src_offset =
293 (input_x + input_y * config.input_width) * src_bytes_per_pixel;
286 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 294 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
287 } 295 }
288 } else { 296 } else {
@@ -291,7 +299,9 @@ inline void Write(u32 addr, const T data) {
291 u32 coarse_y = input_y & ~7; 299 u32 coarse_y = input_y & ~7;
292 u32 stride = config.input_width * src_bytes_per_pixel; 300 u32 stride = config.input_width * src_bytes_per_pixel;
293 301
294 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; 302 src_offset = VideoCore::GetMortonOffset(input_x, input_y,
303 src_bytes_per_pixel) +
304 coarse_y * stride;
295 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 305 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
296 } else { 306 } else {
297 // Both input and output are tiled 307 // Both input and output are tiled
@@ -301,20 +311,27 @@ inline void Write(u32 addr, const T data) {
301 u32 in_coarse_y = input_y & ~7; 311 u32 in_coarse_y = input_y & ~7;
302 u32 in_stride = config.input_width * src_bytes_per_pixel; 312 u32 in_stride = config.input_width * src_bytes_per_pixel;
303 313
304 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; 314 src_offset = VideoCore::GetMortonOffset(input_x, input_y,
305 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; 315 src_bytes_per_pixel) +
316 in_coarse_y * in_stride;
317 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
318 out_coarse_y * out_stride;
306 } 319 }
307 } 320 }
308 321
309 const u8* src_pixel = src_pointer + src_offset; 322 const u8* src_pixel = src_pointer + src_offset;
310 src_color = DecodePixel(config.input_format, src_pixel); 323 src_color = DecodePixel(config.input_format, src_pixel);
311 if (config.scaling == config.ScaleX) { 324 if (config.scaling == config.ScaleX) {
312 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); 325 Math::Vec4<u8> pixel =
326 DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
313 src_color = ((src_color + pixel) / 2).Cast<u8>(); 327 src_color = ((src_color + pixel) / 2).Cast<u8>();
314 } else if (config.scaling == config.ScaleXY) { 328 } else if (config.scaling == config.ScaleXY) {
315 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); 329 Math::Vec4<u8> pixel1 = DecodePixel(
316 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); 330 config.input_format, src_pixel + 1 * src_bytes_per_pixel);
317 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); 331 Math::Vec4<u8> pixel2 = DecodePixel(
332 config.input_format, src_pixel + 2 * src_bytes_per_pixel);
333 Math::Vec4<u8> pixel3 = DecodePixel(
334 config.input_format, src_pixel + 3 * src_bytes_per_pixel);
318 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); 335 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
319 } 336 }
320 337
@@ -341,17 +358,20 @@ inline void Write(u32 addr, const T data) {
341 break; 358 break;
342 359
343 default: 360 default:
344 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); 361 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x",
362 config.output_format.Value());
345 break; 363 break;
346 } 364 }
347 } 365 }
348 } 366 }
349 367
350 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", 368 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> "
351 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), 369 "0x%08x(%ux%u), dst format %x, flags 0x%08X",
352 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), 370 config.output_height * output_width *
353 config.GetPhysicalOutputAddress(), output_width, output_height, 371 GPU::Regs::BytesPerPixel(config.output_format),
354 config.output_format.Value(), config.flags); 372 config.GetPhysicalInputAddress(), config.input_width.Value(),
373 config.input_height.Value(), config.GetPhysicalOutputAddress(),
374 output_width, output_height, config.output_format.Value(), config.flags);
355 } 375 }
356 376
357 g_regs.display_transfer_config.trigger = 0; 377 g_regs.display_transfer_config.trigger = 0;
@@ -361,17 +381,16 @@ inline void Write(u32 addr, const T data) {
361 } 381 }
362 382
363 // Seems like writing to this register triggers processing 383 // Seems like writing to this register triggers processing
364 case GPU_REG_INDEX(command_processor_config.trigger): 384 case GPU_REG_INDEX(command_processor_config.trigger): {
365 {
366 const auto& config = g_regs.command_processor_config; 385 const auto& config = g_regs.command_processor_config;
367 if (config.trigger & 1) 386 if (config.trigger & 1) {
368 {
369 MICROPROFILE_SCOPE(GPU_CmdlistProcessing); 387 MICROPROFILE_SCOPE(GPU_CmdlistProcessing);
370 388
371 u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress()); 389 u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
372 390
373 if (Pica::g_debug_context && Pica::g_debug_context->recorder) { 391 if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
374 Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress()); 392 Pica::g_debug_context->recorder->MemoryAccessed(
393 (u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
375 } 394 }
376 395
377 Pica::CommandProcessor::ProcessCommandList(buffer, config.size); 396 Pica::CommandProcessor::ProcessCommandList(buffer, config.size);
@@ -389,16 +408,17 @@ inline void Write(u32 addr, const T data) {
389 // This is happening *after* handling the write to make sure we properly catch all memory reads. 408 // This is happening *after* handling the write to make sure we properly catch all memory reads.
390 if (Pica::g_debug_context && Pica::g_debug_context->recorder) { 409 if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
391 // addr + GPU VBase - IO VBase + IO PBase 410 // addr + GPU VBase - IO VBase + IO PBase
392 Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); 411 Pica::g_debug_context->recorder->RegisterWritten<T>(
412 addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
393 } 413 }
394} 414}
395 415
396// Explicitly instantiate template functions because we aren't defining this in the header: 416// Explicitly instantiate template functions because we aren't defining this in the header:
397 417
398template void Read<u64>(u64 &var, const u32 addr); 418template void Read<u64>(u64& var, const u32 addr);
399template void Read<u32>(u32 &var, const u32 addr); 419template void Read<u32>(u32& var, const u32 addr);
400template void Read<u16>(u16 &var, const u32 addr); 420template void Read<u16>(u16& var, const u32 addr);
401template void Read<u8>(u8 &var, const u32 addr); 421template void Read<u8>(u8& var, const u32 addr);
402 422
403template void Write<u64>(u32 addr, const u64 data); 423template void Write<u64>(u32 addr, const u64 data);
404template void Write<u32>(u32 addr, const u32 data); 424template void Write<u32>(u32 addr, const u32 data);
@@ -417,8 +437,9 @@ static void VBlankCallback(u64 userdata, int cycles_late) {
417 // - If frameskip == 0 (disabled), always swap buffers 437 // - If frameskip == 0 (disabled), always swap buffers
418 // - If frameskip == 1, swap buffers every other frame (starting from the first frame) 438 // - If frameskip == 1, swap buffers every other frame (starting from the first frame)
419 // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame) 439 // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame)
420 if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) || 440 if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) &&
421 Settings::values.frame_skip == 0) { 441 last_skip_frame != g_skip_frame) ||
442 Settings::values.frame_skip == 0) {
422 VideoCore::g_renderer->SwapBuffers(); 443 VideoCore::g_renderer->SwapBuffers();
423 } 444 }
424 445
@@ -448,12 +469,12 @@ void Init() {
448 // .. or at least these are the ones used by system applets. 469 // .. or at least these are the ones used by system applets.
449 // There's probably a smarter way to come up with addresses 470 // There's probably a smarter way to come up with addresses
450 // like this which does not require hardcoding. 471 // like this which does not require hardcoding.
451 framebuffer_top.address_left1 = 0x181E6000; 472 framebuffer_top.address_left1 = 0x181E6000;
452 framebuffer_top.address_left2 = 0x1822C800; 473 framebuffer_top.address_left2 = 0x1822C800;
453 framebuffer_top.address_right1 = 0x18273000; 474 framebuffer_top.address_right1 = 0x18273000;
454 framebuffer_top.address_right2 = 0x182B9800; 475 framebuffer_top.address_right2 = 0x182B9800;
455 framebuffer_sub.address_left1 = 0x1848F000; 476 framebuffer_sub.address_left1 = 0x1848F000;
456 framebuffer_sub.address_left2 = 0x184C7800; 477 framebuffer_sub.address_left2 = 0x184C7800;
457 478
458 framebuffer_top.width.Assign(240); 479 framebuffer_top.width.Assign(240);
459 framebuffer_top.height.Assign(400); 480 framebuffer_top.height.Assign(400);
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index da4c345b4..32ddc5697 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -6,7 +6,6 @@
6 6
7#include <cstddef> 7#include <cstddef>
8#include <type_traits> 8#include <type_traits>
9
10#include "common/assert.h" 9#include "common/assert.h"
11#include "common/bit_field.h" 10#include "common/bit_field.h"
12#include "common/common_funcs.h" 11#include "common/common_funcs.h"
@@ -17,7 +16,8 @@ namespace GPU {
17// Returns index corresponding to the Regs member labeled by field_name 16// Returns index corresponding to the Regs member labeled by field_name
18// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions 17// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
19// when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])). 18// when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])).
20// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members 19// For details cf.
20// https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
21// Hopefully, this will be fixed sometime in the future. 21// Hopefully, this will be fixed sometime in the future.
22// For lack of better alternatives, we currently hardcode the offsets when constant 22// For lack of better alternatives, we currently hardcode the offsets when constant
23// expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts 23// expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
@@ -30,8 +30,9 @@ namespace GPU {
30// really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX 30// really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX
31// and then performs a (no-op) cast to size_t iff the second argument matches the expected 31// and then performs a (no-op) cast to size_t iff the second argument matches the expected
32// field offset. Otherwise, the compiler will fail to compile this code. 32// field offset. Otherwise, the compiler will fail to compile this code.
33#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ 33#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
34 ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name)) 34 ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type) \
35 GPU_REG_INDEX(field_name))
35#endif 36#endif
36 37
37// MMIO region 0x1EFxxxxx 38// MMIO region 0x1EFxxxxx
@@ -44,18 +45,18 @@ struct Regs {
44// support for that. 45// support for that.
45#define ASSERT_MEMBER_SIZE(name, size_in_bytes) 46#define ASSERT_MEMBER_SIZE(name, size_in_bytes)
46#else 47#else
47#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \ 48#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \
48 static_assert(sizeof(name) == size_in_bytes, \ 49 static_assert(sizeof(name) == size_in_bytes, \
49 "Structure size and register block length don't match") 50 "Structure size and register block length don't match")
50#endif 51#endif
51 52
52 // Components are laid out in reverse byte order, most significant bits first. 53 // Components are laid out in reverse byte order, most significant bits first.
53 enum class PixelFormat : u32 { 54 enum class PixelFormat : u32 {
54 RGBA8 = 0, 55 RGBA8 = 0,
55 RGB8 = 1, 56 RGB8 = 1,
56 RGB565 = 2, 57 RGB565 = 2,
57 RGB5A1 = 3, 58 RGB5A1 = 3,
58 RGBA4 = 4, 59 RGBA4 = 4,
59 }; 60 };
60 61
61 /** 62 /**
@@ -88,8 +89,8 @@ struct Regs {
88 BitField<0, 16, u32> value_16bit; 89 BitField<0, 16, u32> value_16bit;
89 90
90 // TODO: Verify component order 91 // TODO: Verify component order
91 BitField< 0, 8, u32> value_24bit_r; 92 BitField<0, 8, u32> value_24bit_r;
92 BitField< 8, 8, u32> value_24bit_g; 93 BitField<8, 8, u32> value_24bit_g;
93 BitField<16, 8, u32> value_24bit_b; 94 BitField<16, 8, u32> value_24bit_b;
94 }; 95 };
95 96
@@ -126,7 +127,7 @@ struct Regs {
126 union { 127 union {
127 u32 size; 128 u32 size;
128 129
129 BitField< 0, 16, u32> width; 130 BitField<0, 16, u32> width;
130 BitField<16, 16, u32> height; 131 BitField<16, 16, u32> height;
131 }; 132 };
132 133
@@ -138,7 +139,7 @@ struct Regs {
138 union { 139 union {
139 u32 format; 140 u32 format;
140 141
141 BitField< 0, 3, PixelFormat> color_format; 142 BitField<0, 3, PixelFormat> color_format;
142 }; 143 };
143 144
144 INSERT_PADDING_WORDS(0x1); 145 INSERT_PADDING_WORDS(0x1);
@@ -180,35 +181,37 @@ struct Regs {
180 union { 181 union {
181 u32 output_size; 182 u32 output_size;
182 183
183 BitField< 0, 16, u32> output_width; 184 BitField<0, 16, u32> output_width;
184 BitField<16, 16, u32> output_height; 185 BitField<16, 16, u32> output_height;
185 }; 186 };
186 187
187 union { 188 union {
188 u32 input_size; 189 u32 input_size;
189 190
190 BitField< 0, 16, u32> input_width; 191 BitField<0, 16, u32> input_width;
191 BitField<16, 16, u32> input_height; 192 BitField<16, 16, u32> input_height;
192 }; 193 };
193 194
194 enum ScalingMode : u32 { 195 enum ScalingMode : u32 {
195 NoScale = 0, // Doesn't scale the image 196 NoScale = 0, // Doesn't scale the image
196 ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter 197 ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter
197 ScaleXY = 2, // Downscales the image in half in both the X and Y axes and applies a box filter 198 ScaleXY =
199 2, // Downscales the image in half in both the X and Y axes and applies a box filter
198 }; 200 };
199 201
200 union { 202 union {
201 u32 flags; 203 u32 flags;
202 204
203 BitField< 0, 1, u32> flip_vertically; // flips input data vertically 205 BitField<0, 1, u32> flip_vertically; // flips input data vertically
204 BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format 206 BitField<1, 1, u32> input_linear; // Converts from linear to tiled format
205 BitField< 2, 1, u32> crop_input_lines; 207 BitField<2, 1, u32> crop_input_lines;
206 BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields 208 BitField<3, 1, u32> is_texture_copy; // Copies the data without performing any
207 BitField< 5, 1, u32> dont_swizzle; 209 // processing and respecting texture copy fields
208 BitField< 8, 3, PixelFormat> input_format; 210 BitField<5, 1, u32> dont_swizzle;
211 BitField<8, 3, PixelFormat> input_format;
209 BitField<12, 3, PixelFormat> output_format; 212 BitField<12, 3, PixelFormat> output_format;
210 /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. 213 /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
211 BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented 214 BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
212 BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer 215 BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
213 }; 216 };
214 217
@@ -225,14 +228,14 @@ struct Regs {
225 union { 228 union {
226 u32 input_size; 229 u32 input_size;
227 230
228 BitField< 0, 16, u32> input_width; 231 BitField<0, 16, u32> input_width;
229 BitField<16, 16, u32> input_gap; 232 BitField<16, 16, u32> input_gap;
230 }; 233 };
231 234
232 union { 235 union {
233 u32 output_size; 236 u32 output_size;
234 237
235 BitField< 0, 16, u32> output_width; 238 BitField<0, 16, u32> output_width;
236 BitField<16, 16, u32> output_gap; 239 BitField<16, 16, u32> output_gap;
237 }; 240 };
238 } texture_copy; 241 } texture_copy;
@@ -267,12 +270,12 @@ struct Regs {
267 return sizeof(Regs) / sizeof(u32); 270 return sizeof(Regs) / sizeof(u32);
268 } 271 }
269 272
270 const u32& operator [] (int index) const { 273 const u32& operator[](int index) const {
271 const u32* content = reinterpret_cast<const u32*>(this); 274 const u32* content = reinterpret_cast<const u32*>(this);
272 return content[index]; 275 return content[index];
273 } 276 }
274 277
275 u32& operator [] (int index) { 278 u32& operator[](int index) {
276 u32* content = reinterpret_cast<u32*>(this); 279 u32* content = reinterpret_cast<u32*>(this);
277 return content[index]; 280 return content[index];
278 } 281 }
@@ -294,28 +297,29 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan
294// is technically allowed since C++11. This macro should be enabled once MSVC adds 297// is technically allowed since C++11. This macro should be enabled once MSVC adds
295// support for that. 298// support for that.
296#ifndef _MSC_VER 299#ifndef _MSC_VER
297#define ASSERT_REG_POSITION(field_name, position) \ 300#define ASSERT_REG_POSITION(field_name, position) \
298 static_assert(offsetof(Regs, field_name) == position * 4, \ 301 static_assert(offsetof(Regs, field_name) == position * 4, \
299 "Field "#field_name" has invalid position") 302 "Field " #field_name " has invalid position")
300 303
301ASSERT_REG_POSITION(memory_fill_config[0], 0x00004); 304ASSERT_REG_POSITION(memory_fill_config[0], 0x00004);
302ASSERT_REG_POSITION(memory_fill_config[1], 0x00008); 305ASSERT_REG_POSITION(memory_fill_config[1], 0x00008);
303ASSERT_REG_POSITION(framebuffer_config[0], 0x00117); 306ASSERT_REG_POSITION(framebuffer_config[0], 0x00117);
304ASSERT_REG_POSITION(framebuffer_config[1], 0x00157); 307ASSERT_REG_POSITION(framebuffer_config[1], 0x00157);
305ASSERT_REG_POSITION(display_transfer_config, 0x00300); 308ASSERT_REG_POSITION(display_transfer_config, 0x00300);
306ASSERT_REG_POSITION(command_processor_config, 0x00638); 309ASSERT_REG_POSITION(command_processor_config, 0x00638);
307 310
308#undef ASSERT_REG_POSITION 311#undef ASSERT_REG_POSITION
309#endif // !defined(_MSC_VER) 312#endif // !defined(_MSC_VER)
310 313
311// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. 314// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value
315// anyway.
312static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set"); 316static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set");
313 317
314extern Regs g_regs; 318extern Regs g_regs;
315extern bool g_skip_frame; 319extern bool g_skip_frame;
316 320
317template <typename T> 321template <typename T>
318void Read(T &var, const u32 addr); 322void Read(T& var, const u32 addr);
319 323
320template <typename T> 324template <typename T>
321void Write(u32 addr, const T data); 325void Write(u32 addr, const T data);
@@ -326,5 +330,4 @@ void Init();
326/// Shutdown hardware 330/// Shutdown hardware
327void Shutdown(); 331void Shutdown();
328 332
329
330} // namespace 333} // namespace
diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp
index b5fdbf9c1..9ff8825b2 100644
--- a/src/core/hw/hw.cpp
+++ b/src/core/hw/hw.cpp
@@ -4,15 +4,14 @@
4 4
5#include "common/common_types.h" 5#include "common/common_types.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7
8#include "core/hw/hw.h"
9#include "core/hw/gpu.h" 7#include "core/hw/gpu.h"
8#include "core/hw/hw.h"
10#include "core/hw/lcd.h" 9#include "core/hw/lcd.h"
11 10
12namespace HW { 11namespace HW {
13 12
14template <typename T> 13template <typename T>
15inline void Read(T &var, const u32 addr) { 14inline void Read(T& var, const u32 addr) {
16 switch (addr & 0xFFFFF000) { 15 switch (addr & 0xFFFFF000) {
17 case VADDR_GPU: 16 case VADDR_GPU:
18 case VADDR_GPU + 0x1000: 17 case VADDR_GPU + 0x1000:
@@ -71,10 +70,10 @@ inline void Write(u32 addr, const T data) {
71 70
72// Explicitly instantiate template functions because we aren't defining this in the header: 71// Explicitly instantiate template functions because we aren't defining this in the header:
73 72
74template void Read<u64>(u64 &var, const u32 addr); 73template void Read<u64>(u64& var, const u32 addr);
75template void Read<u32>(u32 &var, const u32 addr); 74template void Read<u32>(u32& var, const u32 addr);
76template void Read<u16>(u16 &var, const u32 addr); 75template void Read<u16>(u16& var, const u32 addr);
77template void Read<u8>(u8 &var, const u32 addr); 76template void Read<u8>(u8& var, const u32 addr);
78 77
79template void Write<u64>(u32 addr, const u64 data); 78template void Write<u64>(u32 addr, const u64 data);
80template void Write<u32>(u32 addr, const u32 data); 79template void Write<u32>(u32 addr, const u32 data);
@@ -82,8 +81,7 @@ template void Write<u16>(u32 addr, const u16 data);
82template void Write<u8>(u32 addr, const u8 data); 81template void Write<u8>(u32 addr, const u8 data);
83 82
84/// Update hardware 83/// Update hardware
85void Update() { 84void Update() {}
86}
87 85
88/// Initialize hardware 86/// Initialize hardware
89void Init() { 87void Init() {
@@ -98,5 +96,4 @@ void Shutdown() {
98 LCD::Shutdown(); 96 LCD::Shutdown();
99 LOG_DEBUG(HW, "shutdown OK"); 97 LOG_DEBUG(HW, "shutdown OK");
100} 98}
101
102} 99}
diff --git a/src/core/hw/hw.h b/src/core/hw/hw.h
index d65608910..a3c5d2ea3 100644
--- a/src/core/hw/hw.h
+++ b/src/core/hw/hw.h
@@ -10,30 +10,30 @@ namespace HW {
10 10
11/// Beginnings of IO register regions, in the user VA space. 11/// Beginnings of IO register regions, in the user VA space.
12enum : u32 { 12enum : u32 {
13 VADDR_HASH = 0x1EC01000, 13 VADDR_HASH = 0x1EC01000,
14 VADDR_CSND = 0x1EC03000, 14 VADDR_CSND = 0x1EC03000,
15 VADDR_DSP = 0x1EC40000, 15 VADDR_DSP = 0x1EC40000,
16 VADDR_PDN = 0x1EC41000, 16 VADDR_PDN = 0x1EC41000,
17 VADDR_CODEC = 0x1EC41000, 17 VADDR_CODEC = 0x1EC41000,
18 VADDR_SPI = 0x1EC42000, 18 VADDR_SPI = 0x1EC42000,
19 VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM? 19 VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM?
20 VADDR_I2C = 0x1EC44000, 20 VADDR_I2C = 0x1EC44000,
21 VADDR_CODEC_2 = 0x1EC45000, 21 VADDR_CODEC_2 = 0x1EC45000,
22 VADDR_HID = 0x1EC46000, 22 VADDR_HID = 0x1EC46000,
23 VADDR_GPIO = 0x1EC47000, 23 VADDR_GPIO = 0x1EC47000,
24 VADDR_I2C_2 = 0x1EC48000, 24 VADDR_I2C_2 = 0x1EC48000,
25 VADDR_SPI_3 = 0x1EC60000, 25 VADDR_SPI_3 = 0x1EC60000,
26 VADDR_I2C_3 = 0x1EC61000, 26 VADDR_I2C_3 = 0x1EC61000,
27 VADDR_MIC = 0x1EC62000, 27 VADDR_MIC = 0x1EC62000,
28 VADDR_PXI = 0x1EC63000, 28 VADDR_PXI = 0x1EC63000,
29 VADDR_LCD = 0x1ED02000, 29 VADDR_LCD = 0x1ED02000,
30 VADDR_DSP_2 = 0x1ED03000, 30 VADDR_DSP_2 = 0x1ED03000,
31 VADDR_HASH_2 = 0x1EE01000, 31 VADDR_HASH_2 = 0x1EE01000,
32 VADDR_GPU = 0x1EF00000, 32 VADDR_GPU = 0x1EF00000,
33}; 33};
34 34
35template <typename T> 35template <typename T>
36void Read(T &var, const u32 addr); 36void Read(T& var, const u32 addr);
37 37
38template <typename T> 38template <typename T>
39void Write(u32 addr, const T data); 39void Write(u32 addr, const T data);
diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp
index 6f93709e3..2aa89de18 100644
--- a/src/core/hw/lcd.cpp
+++ b/src/core/hw/lcd.cpp
@@ -3,13 +3,10 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6
7#include "common/common_types.h" 6#include "common/common_types.h"
8#include "common/logging/log.h" 7#include "common/logging/log.h"
9
10#include "core/hw/hw.h" 8#include "core/hw/hw.h"
11#include "core/hw/lcd.h" 9#include "core/hw/lcd.h"
12
13#include "core/tracer/recorder.h" 10#include "core/tracer/recorder.h"
14#include "video_core/debug_utils/debug_utils.h" 11#include "video_core/debug_utils/debug_utils.h"
15 12
@@ -18,7 +15,7 @@ namespace LCD {
18Regs g_regs; 15Regs g_regs;
19 16
20template <typename T> 17template <typename T>
21inline void Read(T &var, const u32 raw_addr) { 18inline void Read(T& var, const u32 raw_addr) {
22 u32 addr = raw_addr - HW::VADDR_LCD; 19 u32 addr = raw_addr - HW::VADDR_LCD;
23 u32 index = addr / 4; 20 u32 index = addr / 4;
24 21
@@ -48,16 +45,17 @@ inline void Write(u32 addr, const T data) {
48 // This is happening *after* handling the write to make sure we properly catch all memory reads. 45 // This is happening *after* handling the write to make sure we properly catch all memory reads.
49 if (Pica::g_debug_context && Pica::g_debug_context->recorder) { 46 if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
50 // addr + GPU VBase - IO VBase + IO PBase 47 // addr + GPU VBase - IO VBase + IO PBase
51 Pica::g_debug_context->recorder->RegisterWritten<T>(addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data); 48 Pica::g_debug_context->recorder->RegisterWritten<T>(
49 addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data);
52 } 50 }
53} 51}
54 52
55// Explicitly instantiate template functions because we aren't defining this in the header: 53// Explicitly instantiate template functions because we aren't defining this in the header:
56 54
57template void Read<u64>(u64 &var, const u32 addr); 55template void Read<u64>(u64& var, const u32 addr);
58template void Read<u32>(u32 &var, const u32 addr); 56template void Read<u32>(u32& var, const u32 addr);
59template void Read<u16>(u16 &var, const u32 addr); 57template void Read<u16>(u16& var, const u32 addr);
60template void Read<u8>(u8 &var, const u32 addr); 58template void Read<u8>(u8& var, const u32 addr);
61 59
62template void Write<u64>(u32 addr, const u64 data); 60template void Write<u64>(u32 addr, const u64 data);
63template void Write<u32>(u32 addr, const u32 data); 61template void Write<u32>(u32 addr, const u32 data);
diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h
index 57029c5e8..191fd44af 100644
--- a/src/core/hw/lcd.h
+++ b/src/core/hw/lcd.h
@@ -6,7 +6,6 @@
6 6
7#include <cstddef> 7#include <cstddef>
8#include <type_traits> 8#include <type_traits>
9
10#include "common/bit_field.h" 9#include "common/bit_field.h"
11#include "common/common_funcs.h" 10#include "common/common_funcs.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
@@ -42,16 +41,15 @@ struct Regs {
42 return sizeof(Regs) / sizeof(u32); 41 return sizeof(Regs) / sizeof(u32);
43 } 42 }
44 43
45 const u32& operator [] (int index) const { 44 const u32& operator[](int index) const {
46 const u32* content = reinterpret_cast<const u32*>(this); 45 const u32* content = reinterpret_cast<const u32*>(this);
47 return content[index]; 46 return content[index];
48 } 47 }
49 48
50 u32& operator [] (int index) { 49 u32& operator[](int index) {
51 u32* content = reinterpret_cast<u32*>(this); 50 u32* content = reinterpret_cast<u32*>(this);
52 return content[index]; 51 return content[index];
53 } 52 }
54
55}; 53};
56static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); 54static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
57 55
@@ -59,14 +57,14 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan
59// is technically allowed since C++11. This macro should be enabled once MSVC adds 57// is technically allowed since C++11. This macro should be enabled once MSVC adds
60// support for that. 58// support for that.
61#ifndef _MSC_VER 59#ifndef _MSC_VER
62#define ASSERT_REG_POSITION(field_name, position) \ 60#define ASSERT_REG_POSITION(field_name, position) \
63 static_assert(offsetof(Regs, field_name) == position * 4, \ 61 static_assert(offsetof(Regs, field_name) == position * 4, \
64 "Field "#field_name" has invalid position") 62 "Field " #field_name " has invalid position")
65 63
66ASSERT_REG_POSITION(color_fill_top, 0x81); 64ASSERT_REG_POSITION(color_fill_top, 0x81);
67ASSERT_REG_POSITION(backlight_top, 0x90); 65ASSERT_REG_POSITION(backlight_top, 0x90);
68ASSERT_REG_POSITION(color_fill_bottom, 0x281); 66ASSERT_REG_POSITION(color_fill_bottom, 0x281);
69ASSERT_REG_POSITION(backlight_bottom, 0x290); 67ASSERT_REG_POSITION(backlight_bottom, 0x290);
70 68
71#undef ASSERT_REG_POSITION 69#undef ASSERT_REG_POSITION
72#endif // !defined(_MSC_VER) 70#endif // !defined(_MSC_VER)
@@ -74,7 +72,7 @@ ASSERT_REG_POSITION(backlight_bottom, 0x290);
74extern Regs g_regs; 72extern Regs g_regs;
75 73
76template <typename T> 74template <typename T>
77void Read(T &var, const u32 addr); 75void Read(T& var, const u32 addr);
78 76
79template <typename T> 77template <typename T>
80void Write(u32 addr, const T data); 78void Write(u32 addr, const T data);
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
index 083391e83..6a6c707a2 100644
--- a/src/core/hw/y2r.cpp
+++ b/src/core/hw/y2r.cpp
@@ -6,13 +6,11 @@
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <memory> 8#include <memory>
9
10#include "common/assert.h" 9#include "common/assert.h"
11#include "common/color.h" 10#include "common/color.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
13#include "common/math_util.h" 12#include "common/math_util.h"
14#include "common/vector_math.h" 13#include "common/vector_math.h"
15
16#include "core/hle/service/y2r_u.h" 14#include "core/hle/service/y2r_u.h"
17#include "core/hw/y2r.h" 15#include "core/hw/y2r.h"
18#include "core/memory.h" 16#include "core/memory.h"
@@ -27,9 +25,9 @@ static const size_t TILE_SIZE = 8 * 8;
27using ImageTile = std::array<u32, TILE_SIZE>; 25using ImageTile = std::array<u32, TILE_SIZE>;
28 26
29/// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. 27/// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles.
30static void ConvertYUVToRGB(InputFormat input_format, 28static void ConvertYUVToRGB(InputFormat input_format, const u8* input_Y, const u8* input_U,
31 const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[], 29 const u8* input_V, ImageTile output[], unsigned int width,
32 unsigned int width, unsigned int height, const CoefficientSet& coefficients) { 30 unsigned int height, const CoefficientSet& coefficients) {
33 31
34 for (unsigned int y = 0; y < height; ++y) { 32 for (unsigned int y = 0; y < height; ++y) {
35 for (unsigned int x = 0; x < width; ++x) { 33 for (unsigned int x = 0; x < width; ++x) {
@@ -58,11 +56,11 @@ static void ConvertYUVToRGB(InputFormat input_format,
58 56
59 // This conversion process is bit-exact with hardware, as far as could be tested. 57 // This conversion process is bit-exact with hardware, as far as could be tested.
60 auto& c = coefficients; 58 auto& c = coefficients;
61 s32 cY = c[0]*Y; 59 s32 cY = c[0] * Y;
62 60
63 s32 r = cY + c[1]*V; 61 s32 r = cY + c[1] * V;
64 s32 g = cY - c[3]*U - c[2]*V; 62 s32 g = cY - c[2] * V - c[3] * U;
65 s32 b = cY + c[4]*U; 63 s32 b = cY + c[4] * U;
66 64
67 const s32 rounding_offset = 0x18; 65 const s32 rounding_offset = 0x18;
68 r = (r >> 3) + c[5] + rounding_offset; 66 r = (r >> 3) + c[5] + rounding_offset;
@@ -74,14 +72,14 @@ static void ConvertYUVToRGB(InputFormat input_format,
74 u32* out = &output[tile][y * 8 + tile_x]; 72 u32* out = &output[tile][y * 8 + tile_x];
75 73
76 using MathUtil::Clamp; 74 using MathUtil::Clamp;
77 *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | 75 *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
78 ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
79 ((u32)Clamp(b >> 5, 0, 0xFF) << 8); 76 ((u32)Clamp(b >> 5, 0, 0xFF) << 8);
80 } 77 }
81 } 78 }
82} 79}
83 80
84/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit. 81/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit
82/// formats to 8-bit.
85template <size_t N> 83template <size_t N>
86static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { 84static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) {
87 const u8* input = Memory::GetPointer(buf.address); 85 const u8* input = Memory::GetPointer(buf.address);
@@ -103,9 +101,10 @@ static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data
103 } 101 }
104} 102}
105 103
106/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer. 104/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA
105/// transfer.
107static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, 106static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data,
108 OutputFormat output_format, u8 alpha) { 107 OutputFormat output_format, u8 alpha) {
109 108
110 u8* output = Memory::GetPointer(buf.address); 109 u8* output = Memory::GetPointer(buf.address);
111 110
@@ -113,9 +112,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
113 u8* unit_end = output + buf.transfer_unit; 112 u8* unit_end = output + buf.transfer_unit;
114 while (output < unit_end) { 113 while (output < unit_end) {
115 u32 color = *input++; 114 u32 color = *input++;
116 Math::Vec4<u8> col_vec{ 115 Math::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha};
117 (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha
118 };
119 116
120 switch (output_format) { 117 switch (output_format) {
121 case OutputFormat::RGBA8: 118 case OutputFormat::RGBA8:
@@ -145,7 +142,8 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
145 } 142 }
146} 143}
147 144
148static const u8 linear_lut[64] = { 145static const u8 linear_lut[TILE_SIZE] = {
146 // clang-format off
149 0, 1, 2, 3, 4, 5, 6, 7, 147 0, 1, 2, 3, 4, 5, 6, 7,
150 8, 9, 10, 11, 12, 13, 14, 15, 148 8, 9, 10, 11, 12, 13, 14, 15,
151 16, 17, 18, 19, 20, 21, 22, 23, 149 16, 17, 18, 19, 20, 21, 22, 23,
@@ -154,9 +152,11 @@ static const u8 linear_lut[64] = {
154 40, 41, 42, 43, 44, 45, 46, 47, 152 40, 41, 42, 43, 44, 45, 46, 47,
155 48, 49, 50, 51, 52, 53, 54, 55, 153 48, 49, 50, 51, 52, 53, 54, 55,
156 56, 57, 58, 59, 60, 61, 62, 63, 154 56, 57, 58, 59, 60, 61, 62, 63,
155 // clang-format on
157}; 156};
158 157
159static const u8 morton_lut[64] = { 158static const u8 morton_lut[TILE_SIZE] = {
159 // clang-format off
160 0, 1, 4, 5, 16, 17, 20, 21, 160 0, 1, 4, 5, 16, 17, 20, 21,
161 2, 3, 6, 7, 18, 19, 22, 23, 161 2, 3, 6, 7, 18, 19, 22, 23,
162 8, 9, 12, 13, 24, 25, 28, 29, 162 8, 9, 12, 13, 24, 25, 28, 29,
@@ -165,15 +165,18 @@ static const u8 morton_lut[64] = {
165 34, 35, 38, 39, 50, 51, 54, 55, 165 34, 35, 38, 39, 50, 51, 54, 55,
166 40, 41, 44, 45, 56, 57, 60, 61, 166 40, 41, 44, 45, 56, 57, 60, 61,
167 42, 43, 46, 47, 58, 59, 62, 63, 167 42, 43, 46, 47, 58, 59, 62, 63,
168 // clang-format on
168}; 169};
169 170
170static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 171static void RotateTile0(const ImageTile& input, ImageTile& output, int height,
172 const u8 out_map[64]) {
171 for (int i = 0; i < height * 8; ++i) { 173 for (int i = 0; i < height * 8; ++i) {
172 output[out_map[i]] = input[i]; 174 output[out_map[i]] = input[i];
173 } 175 }
174} 176}
175 177
176static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 178static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
179 const u8 out_map[64]) {
177 int out_i = 0; 180 int out_i = 0;
178 for (int x = 0; x < 8; ++x) { 181 for (int x = 0; x < 8; ++x) {
179 for (int y = height - 1; y >= 0; --y) { 182 for (int y = height - 1; y >= 0; --y) {
@@ -182,16 +185,18 @@ static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
182 } 185 }
183} 186}
184 187
185static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 188static void RotateTile180(const ImageTile& input, ImageTile& output, int height,
189 const u8 out_map[64]) {
186 int out_i = 0; 190 int out_i = 0;
187 for (int i = height * 8 - 1; i >= 0; --i) { 191 for (int i = height * 8 - 1; i >= 0; --i) {
188 output[out_map[out_i++]] = input[i]; 192 output[out_map[out_i++]] = input[i];
189 } 193 }
190} 194}
191 195
192static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { 196static void RotateTile270(const ImageTile& input, ImageTile& output, int height,
197 const u8 out_map[64]) {
193 int out_i = 0; 198 int out_i = 0;
194 for (int x = 8-1; x >= 0; --x) { 199 for (int x = 8 - 1; x >= 0; --x) {
195 for (int y = 0; y < height; ++y) { 200 for (int y = 0; y < height; ++y) {
196 output[out_map[out_i++]] = input[y * 8 + x]; 201 output[out_map[out_i++]] = input[y * 8 + x];
197 } 202 }
@@ -274,9 +279,11 @@ void PerformConversion(ConversionConfiguration& cvt) {
274 const u8* tile_remap = nullptr; 279 const u8* tile_remap = nullptr;
275 switch (cvt.block_alignment) { 280 switch (cvt.block_alignment) {
276 case BlockAlignment::Linear: 281 case BlockAlignment::Linear:
277 tile_remap = linear_lut; break; 282 tile_remap = linear_lut;
283 break;
278 case BlockAlignment::Block8x8: 284 case BlockAlignment::Block8x8:
279 tile_remap = morton_lut; break; 285 tile_remap = morton_lut;
286 break;
280 } 287 }
281 288
282 for (unsigned int y = 0; y < cvt.input_lines; y += 8) { 289 for (unsigned int y = 0; y < cvt.input_lines; y += 8) {
@@ -320,7 +327,7 @@ void PerformConversion(ConversionConfiguration& cvt) {
320 // Note(yuriks): If additional optimization is required, input_format can be moved to a 327 // Note(yuriks): If additional optimization is required, input_format can be moved to a
321 // template parameter, so that its dispatch can be moved to outside the inner loop. 328 // template parameter, so that its dispatch can be moved to outside the inner loop.
322 ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), 329 ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(),
323 cvt.input_line_width, row_height, cvt.coefficients); 330 cvt.input_line_width, row_height, cvt.coefficients);
324 331
325 u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); 332 u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get());
326 333
@@ -367,9 +374,9 @@ void PerformConversion(ConversionConfiguration& cvt) {
367 374
368 // Note(yuriks): If additional optimization is required, output_format can be moved to a 375 // Note(yuriks): If additional optimization is required, output_format can be moved to a
369 // template parameter, so that its dispatch can be moved to outside the inner loop. 376 // template parameter, so that its dispatch can be moved to outside the inner loop.
370 SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha); 377 SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size,
378 cvt.output_format, (u8)cvt.alpha);
371 } 379 }
372} 380}
373
374} 381}
375} 382}
diff --git a/src/core/hw/y2r.h b/src/core/hw/y2r.h
index 729e1eee3..6b6e71bec 100644
--- a/src/core/hw/y2r.h
+++ b/src/core/hw/y2r.h
@@ -3,13 +3,12 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5namespace Y2R_U { 5namespace Y2R_U {
6 struct ConversionConfiguration; 6struct ConversionConfiguration;
7} 7}
8 8
9namespace HW { 9namespace HW {
10namespace Y2R { 10namespace Y2R {
11 11
12void PerformConversion(Y2R_U::ConversionConfiguration& cvt); 12void PerformConversion(Y2R_U::ConversionConfiguration& cvt);
13
14} 13}
15} 14}