8 files changed, 209 insertions, 184 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index a4dfb7e43..bf2c066f4 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -12,15 +12,15 @@
 #include "common/microprofile.h"
 #include "common/vector_math.h"
-#include "core/settings.h"
-#include "core/memory.h"
 #include "core/core_timing.h"
+#include "core/memory.h"
+#include "core/settings.h"
 #include "core/hle/service/gsp_gpu.h"
 #include "core/hle/service/hid/hid.h"
-#include "core/hw/hw.h"
 #include "core/hw/gpu.h"
+#include "core/hw/hw.h"
 #include "core/tracer/recorder.h"
@@ -32,7 +32,6 @@
 #include "video_core/debug_utils/debug_utils.h"
 namespace GPU {
 Regs g_regs;
@@ -49,7 +48,7 @@ static u64 frame_count;
 static bool last_skip_frame;
 template <typename T>
-inline void Read(T &var, const u32 raw_addr) {
+inline void Read(T& var, const u32 raw_addr) {
    u32 addr = raw_addr - HW::VADDR_GPU;
    u32 index = addr / 4;
@@ -105,8 +104,7 @@ inline void Write(u32 addr, const T data) {
    // Memory fills are triggered once the fill value is written.
    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3):
-    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3):
+    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): {
-    {
        const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
        auto& config = g_regs.memory_fill_config[is_second_filler];
@@ -125,7 +123,9 @@ inline void Write(u32 addr, const T data) {
                //       regions that were between surfaces or within the touching
                //       ones for cpu to manually fill here.
                if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
-                    Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
+                    Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
+                                                               config.GetEndAddress() -
+                                                                   config.GetStartAddress());
                    if (config.fill_24bit) {
                        // fill with 24-bit values
@@ -150,7 +150,8 @@ inline void Write(u32 addr, const T data) {
                    }
                }
-                LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
+                LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(),
+                          config.GetEndAddress());
                if (!is_second_filler) {
                    GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
@@ -167,15 +168,15 @@ inline void Write(u32 addr, const T data) {
        break;
    }
-    case GPU_REG_INDEX(display_transfer_config.trigger):
+    case GPU_REG_INDEX(display_transfer_config.trigger): {
-    {
        MICROPROFILE_SCOPE(GPU_DisplayTransfer);
        const auto& config = g_regs.display_transfer_config;
        if (config.trigger & 1) {
            if (Pica::g_debug_context)
-                Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
+                Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer,
+                                               nullptr);
            if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
                u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
@@ -187,17 +188,23 @@ inline void Write(u32 addr, const T data) {
                    u32 output_width = config.texture_copy.output_width * 16;
                    u32 output_gap = config.texture_copy.output_gap * 16;
-                    size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
+                    size_t contiguous_input_size =
-                    Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size));
+                        config.texture_copy.size / input_width * (input_width + input_gap);
+                    Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
+                                                  static_cast<u32>(contiguous_input_size));
-                    size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
+                    size_t contiguous_output_size =
-                    Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
+                        config.texture_copy.size / output_width * (output_width + output_gap);
+                    Memory::RasterizerFlushAndInvalidateRegion(
+                        config.GetPhysicalOutputAddress(),
+                        static_cast<u32>(contiguous_output_size));
                    u32 remaining_size = config.texture_copy.size;
                    u32 remaining_input = input_width;
                    u32 remaining_output = output_width;
                    while (remaining_size > 0) {
-                        u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
+                        u32 copy_size =
+                            std::min({remaining_input, remaining_output, remaining_size});
                        std::memcpy(dst_pointer, src_pointer, copy_size);
                        src_pointer += copy_size;
@@ -217,10 +224,11 @@ inline void Write(u32 addr, const T data) {
                        }
                    }
-                    LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
+                    LOG_TRACE(
-                        config.texture_copy.size,
+                        HW_GPU,
-                        config.GetPhysicalInputAddress(), input_width, input_gap,
+                        "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
-                        config.GetPhysicalOutputAddress(), output_width, output_gap,
+                        config.texture_copy.size, config.GetPhysicalInputAddress(), input_width,
+                        input_gap, config.GetPhysicalOutputAddress(), output_width, output_gap,
                        config.flags);
                    GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
@@ -228,7 +236,8 @@ inline void Write(u32 addr, const T data) {
                }
                if (config.scaling > config.ScaleXY) {
-                    LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
+                    LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u",
+                                 config.scaling.Value());
                    UNIMPLEMENTED();
                    break;
                }
@@ -245,11 +254,14 @@ inline void Write(u32 addr, const T data) {
                u32 output_width = config.output_width >> horizontal_scale;
                u32 output_height = config.output_height >> vertical_scale;
-                u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
+                u32 input_size = config.input_width * config.input_height *
-                u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
+                                 GPU::Regs::BytesPerPixel(config.input_format);
+                u32 output_size =
+                    output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
                Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
-                Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
+                Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
+                                                           output_size);
                for (u32 y = 0; y < output_height; ++y) {
                    for (u32 x = 0; x < output_width; ++x) {
@@ -278,11 +290,14 @@ inline void Write(u32 addr, const T data) {
                                u32 coarse_y = y & ~7;
                                u32 stride = output_width * dst_bytes_per_pixel;
-                                src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+                                src_offset =
-                                dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
+                                    (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+                                dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
+                                             coarse_y * stride;
                            } else {
                                // Both input and output are linear
-                                src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+                                src_offset =
+                                    (input_x + input_y * config.input_width) * src_bytes_per_pixel;
                                dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
                            }
                        } else {
@@ -291,7 +306,9 @@ inline void Write(u32 addr, const T data) {
                                u32 coarse_y = input_y & ~7;
                                u32 stride = config.input_width * src_bytes_per_pixel;
-                                src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
+                                src_offset = VideoCore::GetMortonOffset(input_x, input_y,
+                                                                        src_bytes_per_pixel) +
+                                             coarse_y * stride;
                                dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
                            } else {
                                // Both input and output are tiled
@@ -301,20 +318,27 @@ inline void Write(u32 addr, const T data) {
                                u32 in_coarse_y = input_y & ~7;
                                u32 in_stride = config.input_width * src_bytes_per_pixel;
-                                src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
+                                src_offset = VideoCore::GetMortonOffset(input_x, input_y,
-                                dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
+                                                                        src_bytes_per_pixel) +
+                                             in_coarse_y * in_stride;
+                                dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
+                                             out_coarse_y * out_stride;
                            }
                        }
                        const u8* src_pixel = src_pointer + src_offset;
                        src_color = DecodePixel(config.input_format, src_pixel);
                        if (config.scaling == config.ScaleX) {
-                            Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
+                            Math::Vec4<u8> pixel =
+                                DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
                            src_color = ((src_color + pixel) / 2).Cast<u8>();
                        } else if (config.scaling == config.ScaleXY) {
-                            Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
+                            Math::Vec4<u8> pixel1 = DecodePixel(
-                            Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
+                                config.input_format, src_pixel + 1 * src_bytes_per_pixel);
-                            Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
+                            Math::Vec4<u8> pixel2 = DecodePixel(
+                                config.input_format, src_pixel + 2 * src_bytes_per_pixel);
+                            Math::Vec4<u8> pixel3 = DecodePixel(
+                                config.input_format, src_pixel + 3 * src_bytes_per_pixel);
                            src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
                        }
@@ -341,17 +365,20 @@ inline void Write(u32 addr, const T data) {
                            break;
                        default:
-                            LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value());
+                            LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x",
+                                      config.output_format.Value());
                            break;
                        }
                    }
                }
-                LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X",
+                LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> "
-                      config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
+                                  "0x%08x(%ux%u), dst format %x, flags 0x%08X",
-                      config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
+                          config.output_height * output_width *
-                      config.GetPhysicalOutputAddress(), output_width, output_height,
+                              GPU::Regs::BytesPerPixel(config.output_format),
-                      config.output_format.Value(), config.flags);
+                          config.GetPhysicalInputAddress(), config.input_width.Value(),
+                          config.input_height.Value(), config.GetPhysicalOutputAddress(),
+                          output_width, output_height, config.output_format.Value(), config.flags);
            }
            g_regs.display_transfer_config.trigger = 0;
@@ -361,17 +388,16 @@ inline void Write(u32 addr, const T data) {
    }
    // Seems like writing to this register triggers processing
-    case GPU_REG_INDEX(command_processor_config.trigger):
+    case GPU_REG_INDEX(command_processor_config.trigger): {
-    {
        const auto& config = g_regs.command_processor_config;
-        if (config.trigger & 1)
+        if (config.trigger & 1) {
-        {
            MICROPROFILE_SCOPE(GPU_CmdlistProcessing);
            u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
            if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
-                Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
+                Pica::g_debug_context->recorder->MemoryAccessed(
+                    (u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
            }
            Pica::CommandProcessor::ProcessCommandList(buffer, config.size);
@@ -389,16 +415,17 @@ inline void Write(u32 addr, const T data) {
    // This is happening *after* handling the write to make sure we properly catch all memory reads.
    if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
        // addr + GPU VBase - IO VBase + IO PBase
-        Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
+        Pica::g_debug_context->recorder->RegisterWritten<T>(
+            addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
    }
 }
 // Explicitly instantiate template functions because we aren't defining this in the header:
-template void Read<u64>(u64 &var, const u32 addr);
+template void Read<u64>(u64& var, const u32 addr);
-template void Read<u32>(u32 &var, const u32 addr);
+template void Read<u32>(u32& var, const u32 addr);
-template void Read<u16>(u16 &var, const u32 addr);
+template void Read<u16>(u16& var, const u32 addr);
-template void Read<u8>(u8 &var, const u32 addr);
+template void Read<u8>(u8& var, const u32 addr);
 template void Write<u64>(u32 addr, const u64 data);
 template void Write<u32>(u32 addr, const u32 data);
@@ -417,8 +444,9 @@ static void VBlankCallback(u64 userdata, int cycles_late) {
    //  - If frameskip == 0 (disabled), always swap buffers
    //  - If frameskip == 1, swap buffers every other frame (starting from the first frame)
    //  - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame)
-    if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) ||
+    if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) &&
-            Settings::values.frame_skip == 0) {
+         last_skip_frame != g_skip_frame) ||
+        Settings::values.frame_skip == 0) {
        VideoCore::g_renderer->SwapBuffers();
    }
@@ -448,12 +476,12 @@ void Init() {
    // .. or at least these are the ones used by system applets.
    // There's probably a smarter way to come up with addresses
    // like this which does not require hardcoding.
-    framebuffer_top.address_left1  = 0x181E6000;
+    framebuffer_top.address_left1 = 0x181E6000;
-    framebuffer_top.address_left2  = 0x1822C800;
+    framebuffer_top.address_left2 = 0x1822C800;
    framebuffer_top.address_right1 = 0x18273000;
    framebuffer_top.address_right2 = 0x182B9800;
-    framebuffer_sub.address_left1  = 0x1848F000;
+    framebuffer_sub.address_left1 = 0x1848F000;
-    framebuffer_sub.address_left2  = 0x184C7800;
+    framebuffer_sub.address_left2 = 0x184C7800;
    framebuffer_top.width.Assign(240);
    framebuffer_top.height.Assign(400);
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index da4c345b4..077b6255f 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -17,7 +17,8 @@ namespace GPU {
 // Returns index corresponding to the Regs member labeled by field_name
 // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
 //       when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])).
-//       For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
+//       For details cf.
+//       https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
 //       Hopefully, this will be fixed sometime in the future.
 //       For lack of better alternatives, we currently hardcode the offsets when constant
 //       expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
@@ -30,8 +31,9 @@ namespace GPU {
 //       really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX
 //       and then performs a (no-op) cast to size_t iff the second argument matches the expected
 //       field offset. Otherwise, the compiler will fail to compile this code.
-#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
+#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index)                              \
-    ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name))
+    ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type) \
+         GPU_REG_INDEX(field_name))
 #endif
 // MMIO region 0x1EFxxxxx
@@ -44,18 +46,18 @@ struct Regs {
 //       support for that.
 #define ASSERT_MEMBER_SIZE(name, size_in_bytes)
 #else
-#define ASSERT_MEMBER_SIZE(name, size_in_bytes)  \
+#define ASSERT_MEMBER_SIZE(name, size_in_bytes)                                                    \
-    static_assert(sizeof(name) == size_in_bytes, \
+    static_assert(sizeof(name) == size_in_bytes,                                                   \
                  "Structure size and register block length don't match")
 #endif
    // Components are laid out in reverse byte order, most significant bits first.
    enum class PixelFormat : u32 {
-        RGBA8  = 0,
+        RGBA8 = 0,
-        RGB8   = 1,
+        RGB8 = 1,
        RGB565 = 2,
        RGB5A1 = 3,
-        RGBA4  = 4,
+        RGBA4 = 4,
    };
    /**
@@ -88,8 +90,8 @@ struct Regs {
            BitField<0, 16, u32> value_16bit;
            // TODO: Verify component order
-            BitField< 0, 8, u32> value_24bit_r;
+            BitField<0, 8, u32> value_24bit_r;
-            BitField< 8, 8, u32> value_24bit_g;
+            BitField<8, 8, u32> value_24bit_g;
            BitField<16, 8, u32> value_24bit_b;
        };
@@ -126,7 +128,7 @@ struct Regs {
        union {
            u32 size;
-            BitField< 0, 16, u32> width;
+            BitField<0, 16, u32> width;
            BitField<16, 16, u32> height;
        };
@@ -138,7 +140,7 @@ struct Regs {
        union {
            u32 format;
-            BitField< 0, 3, PixelFormat> color_format;
+            BitField<0, 3, PixelFormat> color_format;
        };
        INSERT_PADDING_WORDS(0x1);
@@ -180,35 +182,37 @@ struct Regs {
        union {
            u32 output_size;
-            BitField< 0, 16, u32> output_width;
+            BitField<0, 16, u32> output_width;
            BitField<16, 16, u32> output_height;
        };
        union {
            u32 input_size;
-            BitField< 0, 16, u32> input_width;
+            BitField<0, 16, u32> input_width;
            BitField<16, 16, u32> input_height;
        };
        enum ScalingMode : u32 {
-            NoScale  = 0,  // Doesn't scale the image
+            NoScale = 0, // Doesn't scale the image
-            ScaleX   = 1,  // Downscales the image in half in the X axis and applies a box filter
+            ScaleX = 1,  // Downscales the image in half in the X axis and applies a box filter
-            ScaleXY  = 2,  // Downscales the image in half in both the X and Y axes and applies a box filter
+            ScaleXY =
+                2, // Downscales the image in half in both the X and Y axes and applies a box filter
        };
        union {
            u32 flags;
-            BitField< 0, 1, u32> flip_vertically;  // flips input data vertically
+            BitField<0, 1, u32> flip_vertically; // flips input data vertically
-            BitField< 1, 1, u32> input_linear;     // Converts from linear to tiled format
+            BitField<1, 1, u32> input_linear;    // Converts from linear to tiled format
-            BitField< 2, 1, u32> crop_input_lines;
+            BitField<2, 1, u32> crop_input_lines;
-            BitField< 3, 1, u32> is_texture_copy;  // Copies the data without performing any processing and respecting texture copy fields
+            BitField<3, 1, u32> is_texture_copy; // Copies the data without performing any
-            BitField< 5, 1, u32> dont_swizzle;
+                                                 // processing and respecting texture copy fields
-            BitField< 8, 3, PixelFormat> input_format;
+            BitField<5, 1, u32> dont_swizzle;
+            BitField<8, 3, PixelFormat> input_format;
            BitField<12, 3, PixelFormat> output_format;
            /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
-            BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
+            BitField<16, 1, u32> block_32;        // TODO(yuriks): unimplemented
            BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
        };
@@ -225,14 +229,14 @@ struct Regs {
            union {
                u32 input_size;
-                BitField< 0, 16, u32> input_width;
+                BitField<0, 16, u32> input_width;
                BitField<16, 16, u32> input_gap;
            };
            union {
                u32 output_size;
-                BitField< 0, 16, u32> output_width;
+                BitField<0, 16, u32> output_width;
                BitField<16, 16, u32> output_gap;
            };
        } texture_copy;
@@ -267,12 +271,12 @@ struct Regs {
        return sizeof(Regs) / sizeof(u32);
    }
-    const u32& operator [] (int index) const {
+    const u32& operator[](int index) const {
        const u32* content = reinterpret_cast<const u32*>(this);
        return content[index];
    }
-    u32& operator [] (int index) {
+    u32& operator[](int index) {
        u32* content = reinterpret_cast<u32*>(this);
        return content[index];
    }
@@ -294,28 +298,29 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan
 //       is technically allowed since C++11. This macro should be enabled once MSVC adds
 //       support for that.
 #ifndef _MSC_VER
-#define ASSERT_REG_POSITION(field_name, position)             \
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(Regs, field_name) == position * 4, \
+    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
-                  "Field "#field_name" has invalid position")
+                  "Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(memory_fill_config[0],    0x00004);
+ASSERT_REG_POSITION(memory_fill_config[0], 0x00004);
-ASSERT_REG_POSITION(memory_fill_config[1],    0x00008);
+ASSERT_REG_POSITION(memory_fill_config[1], 0x00008);
-ASSERT_REG_POSITION(framebuffer_config[0],    0x00117);
+ASSERT_REG_POSITION(framebuffer_config[0], 0x00117);
-ASSERT_REG_POSITION(framebuffer_config[1],    0x00157);
+ASSERT_REG_POSITION(framebuffer_config[1], 0x00157);
-ASSERT_REG_POSITION(display_transfer_config,  0x00300);
+ASSERT_REG_POSITION(display_transfer_config, 0x00300);
 ASSERT_REG_POSITION(command_processor_config, 0x00638);
 #undef ASSERT_REG_POSITION
 #endif // !defined(_MSC_VER)
-// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
+// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value
+// anyway.
 static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set");
 extern Regs g_regs;
 extern bool g_skip_frame;
 template <typename T>
-void Read(T &var, const u32 addr);
+void Read(T& var, const u32 addr);
 template <typename T>
 void Write(u32 addr, const T data);
@@ -326,5 +331,4 @@ void Init();
 /// Shutdown hardware
 void Shutdown();
 } // namespace
diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp
index b5fdbf9c1..db224c9aa 100644
--- a/src/core/hw/hw.cpp
+++ b/src/core/hw/hw.cpp
@@ -5,14 +5,14 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
-#include "core/hw/hw.h"
 #include "core/hw/gpu.h"
+#include "core/hw/hw.h"
 #include "core/hw/lcd.h"
 namespace HW {
 template <typename T>
-inline void Read(T &var, const u32 addr) {
+inline void Read(T& var, const u32 addr) {
    switch (addr & 0xFFFFF000) {
    case VADDR_GPU:
    case VADDR_GPU + 0x1000:
@@ -71,10 +71,10 @@ inline void Write(u32 addr, const T data) {
 // Explicitly instantiate template functions because we aren't defining this in the header:
-template void Read<u64>(u64 &var, const u32 addr);
+template void Read<u64>(u64& var, const u32 addr);
-template void Read<u32>(u32 &var, const u32 addr);
+template void Read<u32>(u32& var, const u32 addr);
-template void Read<u16>(u16 &var, const u32 addr);
+template void Read<u16>(u16& var, const u32 addr);
-template void Read<u8>(u8 &var, const u32 addr);
+template void Read<u8>(u8& var, const u32 addr);
 template void Write<u64>(u32 addr, const u64 data);
 template void Write<u32>(u32 addr, const u32 data);
@@ -98,5 +98,4 @@ void Shutdown() {
    LCD::Shutdown();
    LOG_DEBUG(HW, "shutdown OK");
 }
 }
diff --git a/src/core/hw/hw.h b/src/core/hw/hw.h
index d65608910..a3c5d2ea3 100644
--- a/src/core/hw/hw.h
+++ b/src/core/hw/hw.h
@@ -10,30 +10,30 @@ namespace HW {
 /// Beginnings of IO register regions, in the user VA space.
 enum : u32 {
-    VADDR_HASH      = 0x1EC01000,
+    VADDR_HASH = 0x1EC01000,
-    VADDR_CSND      = 0x1EC03000,
+    VADDR_CSND = 0x1EC03000,
-    VADDR_DSP       = 0x1EC40000,
+    VADDR_DSP = 0x1EC40000,
-    VADDR_PDN       = 0x1EC41000,
+    VADDR_PDN = 0x1EC41000,
-    VADDR_CODEC     = 0x1EC41000,
+    VADDR_CODEC = 0x1EC41000,
-    VADDR_SPI       = 0x1EC42000,
+    VADDR_SPI = 0x1EC42000,
-    VADDR_SPI_2     = 0x1EC43000,   // Only used under TWL_FIRM?
+    VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM?
-    VADDR_I2C       = 0x1EC44000,
+    VADDR_I2C = 0x1EC44000,
-    VADDR_CODEC_2   = 0x1EC45000,
+    VADDR_CODEC_2 = 0x1EC45000,
-    VADDR_HID       = 0x1EC46000,
+    VADDR_HID = 0x1EC46000,
-    VADDR_GPIO      = 0x1EC47000,
+    VADDR_GPIO = 0x1EC47000,
-    VADDR_I2C_2     = 0x1EC48000,
+    VADDR_I2C_2 = 0x1EC48000,
-    VADDR_SPI_3     = 0x1EC60000,
+    VADDR_SPI_3 = 0x1EC60000,
-    VADDR_I2C_3     = 0x1EC61000,
+    VADDR_I2C_3 = 0x1EC61000,
-    VADDR_MIC       = 0x1EC62000,
+    VADDR_MIC = 0x1EC62000,
-    VADDR_PXI       = 0x1EC63000,
+    VADDR_PXI = 0x1EC63000,
-    VADDR_LCD       = 0x1ED02000,
+    VADDR_LCD = 0x1ED02000,
-    VADDR_DSP_2     = 0x1ED03000,
+    VADDR_DSP_2 = 0x1ED03000,
-    VADDR_HASH_2    = 0x1EE01000,
+    VADDR_HASH_2 = 0x1EE01000,
-    VADDR_GPU       = 0x1EF00000,
+    VADDR_GPU = 0x1EF00000,
 };
 template <typename T>
-void Read(T &var, const u32 addr);
+void Read(T& var, const u32 addr);
 template <typename T>
 void Write(u32 addr, const T data);
diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp
index 6f93709e3..0e3aa7cfd 100644
--- a/src/core/hw/lcd.cpp
+++ b/src/core/hw/lcd.cpp
@@ -18,7 +18,7 @@ namespace LCD {
 Regs g_regs;
 template <typename T>
-inline void Read(T &var, const u32 raw_addr) {
+inline void Read(T& var, const u32 raw_addr) {
    u32 addr = raw_addr - HW::VADDR_LCD;
    u32 index = addr / 4;
@@ -48,16 +48,17 @@ inline void Write(u32 addr, const T data) {
    // This is happening *after* handling the write to make sure we properly catch all memory reads.
    if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
        // addr + GPU VBase - IO VBase + IO PBase
-        Pica::g_debug_context->recorder->RegisterWritten<T>(addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data);
+        Pica::g_debug_context->recorder->RegisterWritten<T>(
+            addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data);
    }
 }
 // Explicitly instantiate template functions because we aren't defining this in the header:
-template void Read<u64>(u64 &var, const u32 addr);
+template void Read<u64>(u64& var, const u32 addr);
-template void Read<u32>(u32 &var, const u32 addr);
+template void Read<u32>(u32& var, const u32 addr);
-template void Read<u16>(u16 &var, const u32 addr);
+template void Read<u16>(u16& var, const u32 addr);
-template void Read<u8>(u8 &var, const u32 addr);
+template void Read<u8>(u8& var, const u32 addr);
 template void Write<u64>(u32 addr, const u64 data);
 template void Write<u32>(u32 addr, const u32 data);
diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h
index 57029c5e8..404833165 100644
--- a/src/core/hw/lcd.h
+++ b/src/core/hw/lcd.h
@@ -42,16 +42,15 @@ struct Regs {
        return sizeof(Regs) / sizeof(u32);
    }
-    const u32& operator [] (int index) const {
+    const u32& operator[](int index) const {
        const u32* content = reinterpret_cast<const u32*>(this);
        return content[index];
    }
-    u32& operator [] (int index) {
+    u32& operator[](int index) {
        u32* content = reinterpret_cast<u32*>(this);
        return content[index];
    }
 };
 static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
@@ -59,14 +58,14 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan
 //       is technically allowed since C++11. This macro should be enabled once MSVC adds
 //       support for that.
 #ifndef _MSC_VER
-#define ASSERT_REG_POSITION(field_name, position) \
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(Regs, field_name) == position * 4, \
+    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
-              "Field "#field_name" has invalid position")
+                  "Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(color_fill_top,    0x81);
+ASSERT_REG_POSITION(color_fill_top, 0x81);
-ASSERT_REG_POSITION(backlight_top,     0x90);
+ASSERT_REG_POSITION(backlight_top, 0x90);
 ASSERT_REG_POSITION(color_fill_bottom, 0x281);
-ASSERT_REG_POSITION(backlight_bottom,  0x290);
+ASSERT_REG_POSITION(backlight_bottom, 0x290);
 #undef ASSERT_REG_POSITION
 #endif // !defined(_MSC_VER)
@@ -74,7 +73,7 @@ ASSERT_REG_POSITION(backlight_bottom,  0x290);
 extern Regs g_regs;
 template <typename T>
-void Read(T &var, const u32 addr);
+void Read(T& var, const u32 addr);
 template <typename T>
 void Write(u32 addr, const T data);
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
index 083391e83..5a68d7e65 100644
--- a/src/core/hw/y2r.cpp
+++ b/src/core/hw/y2r.cpp
@@ -27,9 +27,9 @@ static const size_t TILE_SIZE = 8 * 8;
 using ImageTile = std::array<u32, TILE_SIZE>;
 /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles.
-static void ConvertYUVToRGB(InputFormat input_format,
+static void ConvertYUVToRGB(InputFormat input_format, const u8* input_Y, const u8* input_U,
-        const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[],
+                            const u8* input_V, ImageTile output[], unsigned int width,
-        unsigned int width, unsigned int height, const CoefficientSet& coefficients) {
+                            unsigned int height, const CoefficientSet& coefficients) {
    for (unsigned int y = 0; y < height; ++y) {
        for (unsigned int x = 0; x < width; ++x) {
@@ -58,11 +58,11 @@ static void ConvertYUVToRGB(InputFormat input_format,
            // This conversion process is bit-exact with hardware, as far as could be tested.
            auto& c = coefficients;
-            s32 cY = c[0]*Y;
+            s32 cY = c[0] * Y;
-            s32 r = cY          + c[1]*V;
+            s32 r = cY + c[1] * V;
-            s32 g = cY - c[3]*U - c[2]*V;
+            s32 g = cY - c[3] * U - c[2] * V;
-            s32 b = cY + c[4]*U;
+            s32 b = cY + c[4] * U;
            const s32 rounding_offset = 0x18;
            r = (r >> 3) + c[5] + rounding_offset;
@@ -74,14 +74,14 @@ static void ConvertYUVToRGB(InputFormat input_format,
            u32* out = &output[tile][y * 8 + tile_x];
            using MathUtil::Clamp;
-            *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) |
+            *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
-                   ((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
                   ((u32)Clamp(b >> 5, 0, 0xFF) << 8);
        }
    }
 }
-/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit.
+/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit
+/// formats to 8-bit.
 template <size_t N>
 static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) {
    const u8* input = Memory::GetPointer(buf.address);
@@ -103,9 +103,10 @@ static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data
    }
 }
-/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer.
+/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA
+/// transfer.
 static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data,
-        OutputFormat output_format, u8 alpha) {
+                     OutputFormat output_format, u8 alpha) {
    u8* output = Memory::GetPointer(buf.address);
@@ -113,9 +114,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
        u8* unit_end = output + buf.transfer_unit;
        while (output < unit_end) {
            u32 color = *input++;
-            Math::Vec4<u8> col_vec{
+            Math::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha};
-                (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha
-            };
            switch (output_format) {
            case OutputFormat::RGBA8:
@@ -146,34 +145,26 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
 }
 static const u8 linear_lut[64] = {
-     0,  1,  2,  3,  4,  5,  6,  7,
+    0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
-     8,  9, 10, 11, 12, 13, 14, 15,
+    22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
-    16, 17, 18, 19, 20, 21, 22, 23,
+    44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    24, 25, 26, 27, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39,
-    40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55,
-    56, 57, 58, 59, 60, 61, 62, 63,
 };
 static const u8 morton_lut[64] = {
-     0,  1,  4,  5, 16, 17, 20, 21,
+    0,  1,  4,  5,  16, 17, 20, 21, 2,  3,  6,  7,  18, 19, 22, 23, 8,  9,  12, 13, 24, 25,
-     2,  3,  6,  7, 18, 19, 22, 23,
+    28, 29, 10, 11, 14, 15, 26, 27, 30, 31, 32, 33, 36, 37, 48, 49, 52, 53, 34, 35, 38, 39,
-     8,  9, 12, 13, 24, 25, 28, 29,
+    50, 51, 54, 55, 40, 41, 44, 45, 56, 57, 60, 61, 42, 43, 46, 47, 58, 59, 62, 63,
-    10, 11, 14, 15, 26, 27, 30, 31,
-    32, 33, 36, 37, 48, 49, 52, 53,
-    34, 35, 38, 39, 50, 51, 54, 55,
-    40, 41, 44, 45, 56, 57, 60, 61,
-    42, 43, 46, 47, 58, 59, 62, 63,
 };
-static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
+static void RotateTile0(const ImageTile& input, ImageTile& output, int height,
+                        const u8 out_map[64]) {
    for (int i = 0; i < height * 8; ++i) {
        output[out_map[i]] = input[i];
    }
 }
-static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
+static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
+                         const u8 out_map[64]) {
    int out_i = 0;
    for (int x = 0; x < 8; ++x) {
        for (int y = height - 1; y >= 0; --y) {
@@ -182,16 +173,18 @@ static void RotateTile90(const ImageTile& input, ImageTile& output, int height,
    }
 }
-static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
+static void RotateTile180(const ImageTile& input, ImageTile& output, int height,
+                          const u8 out_map[64]) {
    int out_i = 0;
    for (int i = height * 8 - 1; i >= 0; --i) {
        output[out_map[out_i++]] = input[i];
    }
 }
-static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
+static void RotateTile270(const ImageTile& input, ImageTile& output, int height,
+                          const u8 out_map[64]) {
    int out_i = 0;
-    for (int x = 8-1; x >= 0; --x) {
+    for (int x = 8 - 1; x >= 0; --x) {
        for (int y = 0; y < height; ++y) {
            output[out_map[out_i++]] = input[y * 8 + x];
        }
@@ -274,9 +267,11 @@ void PerformConversion(ConversionConfiguration& cvt) {
    const u8* tile_remap = nullptr;
    switch (cvt.block_alignment) {
    case BlockAlignment::Linear:
-        tile_remap = linear_lut; break;
+        tile_remap = linear_lut;
+        break;
    case BlockAlignment::Block8x8:
-        tile_remap = morton_lut; break;
+        tile_remap = morton_lut;
+        break;
    }
    for (unsigned int y = 0; y < cvt.input_lines; y += 8) {
@@ -320,7 +315,7 @@ void PerformConversion(ConversionConfiguration& cvt) {
        // Note(yuriks): If additional optimization is required, input_format can be moved to a
        // template parameter, so that its dispatch can be moved to outside the inner loop.
        ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(),
-                cvt.input_line_width, row_height, cvt.coefficients);
+                        cvt.input_line_width, row_height, cvt.coefficients);
        u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get());
@@ -367,9 +362,9 @@ void PerformConversion(ConversionConfiguration& cvt) {
        // Note(yuriks): If additional optimization is required, output_format can be moved to a
        // template parameter, so that its dispatch can be moved to outside the inner loop.
-        SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha);
+        SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size,
+                 cvt.output_format, (u8)cvt.alpha);
    }
 }
 }
 }
diff --git a/src/core/hw/y2r.h b/src/core/hw/y2r.h
index 729e1eee3..6b6e71bec 100644
--- a/src/core/hw/y2r.h
+++ b/src/core/hw/y2r.h
@@ -3,13 +3,12 @@
 // Refer to the license.txt file included.
 namespace Y2R_U {
-    struct ConversionConfiguration;
+struct ConversionConfiguration;
 }
 namespace HW {
 namespace Y2R {
 void PerformConversion(Y2R_U::ConversionConfiguration& cvt);
 }
 }