1 files changed, 46 insertions, 33 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 7471def57..dd3b31650 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -53,6 +53,29 @@ inline void Read(T &var, const u32 raw_addr) {
    var = g_regs[addr / 4];
 }
+static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_pixel) {
+    switch (input_format) {
+    case Regs::PixelFormat::RGBA8:
+        return Color::DecodeRGBA8(src_pixel);
+    case Regs::PixelFormat::RGB8:
+        return Color::DecodeRGB8(src_pixel);
+    case Regs::PixelFormat::RGB565:
+        return Color::DecodeRGB565(src_pixel);
+    case Regs::PixelFormat::RGB5A1:
+        return Color::DecodeRGB5A1(src_pixel);
+    case Regs::PixelFormat::RGBA4:
+        return Color::DecodeRGBA4(src_pixel);
+    default:
+        LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", input_format);
+        return {0, 0, 0, 0};
+    }
+}
 template <typename T>
 inline void Write(u32 addr, const T data) {
    addr -= HW::VADDR_GPU;
@@ -125,11 +148,18 @@ inline void Write(u32 addr, const T data) {
                break;
            }
-            unsigned horizontal_scale = (config.scaling != config.NoScale) ? 2 : 1;
+            if (config.output_tiled &&
-            unsigned vertical_scale = (config.scaling == config.ScaleXY) ? 2 : 1;
+                    (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) {
+                LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
+                UNIMPLEMENTED();
+                break;
+            }
-            u32 output_width = config.output_width / horizontal_scale;
+            bool horizontal_scale = config.scaling != config.NoScale;
-            u32 output_height = config.output_height / vertical_scale;
+            bool vertical_scale = config.scaling == config.ScaleXY;
+            u32 output_width = config.output_width >> horizontal_scale;
+            u32 output_height = config.output_height >> vertical_scale;
            u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
            u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
@@ -153,16 +183,14 @@ inline void Write(u32 addr, const T data) {
                break;
            }
-            // TODO(Subv): Implement the box filter when scaling is enabled
-            // right now we're just skipping the extra pixels.
            for (u32 y = 0; y < output_height; ++y) {
                for (u32 x = 0; x < output_width; ++x) {
-                    Math::Vec4<u8> src_color = { 0, 0, 0, 0 };
+                    Math::Vec4<u8> src_color;
                    // Calculate the [x,y] position of the input image
                    // based on the current output position and the scale
-                    u32 input_x = x * horizontal_scale;
+                    u32 input_x = x << horizontal_scale;
-                    u32 input_y = y * vertical_scale;
+                    u32 input_y = y << vertical_scale;
                    if (config.flip_vertically) {
                        // Flip the y value of the output data,
@@ -193,30 +221,15 @@ inline void Write(u32 addr, const T data) {
                    }
                    const u8* src_pixel = src_pointer + src_offset;
-                    switch (config.input_format) {
+                    src_color = DecodePixel(config.input_format, src_pixel);
-                    case Regs::PixelFormat::RGBA8:
+                    if (config.scaling == config.ScaleX) {
-                        src_color = Color::DecodeRGBA8(src_pixel);
+                        Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
-                        break;
+                        src_color = ((src_color + pixel) / 2).Cast<u8>();
+                    } else if (config.scaling == config.ScaleXY) {
-                    case Regs::PixelFormat::RGB8:
+                        Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
-                        src_color = Color::DecodeRGB8(src_pixel);
+                        Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
-                        break;
+                        Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
+                        src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
-                    case Regs::PixelFormat::RGB565:
-                        src_color = Color::DecodeRGB565(src_pixel);
-                        break;
-                    case Regs::PixelFormat::RGB5A1:
-                        src_color = Color::DecodeRGB5A1(src_pixel);
-                        break;
-                    case Regs::PixelFormat::RGBA4:
-                        src_color = Color::DecodeRGBA4(src_pixel);
-                        break;
-                    default:
-                        LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", config.input_format.Value());
-                        break;
                    }
                    u8* dst_pixel = dst_pointer + dst_offset;

diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 7471def57..dd3b31650 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp
@@ -53,6 +53,29 @@ inline void Read(T &var, const u32 raw_addr) {
53	var = g_regs[addr / 4];	53	var = g_regs[addr / 4];
54	}	54	}
55		55
		56	static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_pixel) {
		57	switch (input_format) {
		58	case Regs::PixelFormat::RGBA8:
		59	return Color::DecodeRGBA8(src_pixel);
		60
		61	case Regs::PixelFormat::RGB8:
		62	return Color::DecodeRGB8(src_pixel);
		63
		64	case Regs::PixelFormat::RGB565:
		65	return Color::DecodeRGB565(src_pixel);
		66
		67	case Regs::PixelFormat::RGB5A1:
		68	return Color::DecodeRGB5A1(src_pixel);
		69
		70	case Regs::PixelFormat::RGBA4:
		71	return Color::DecodeRGBA4(src_pixel);
		72
		73	default:
		74	LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", input_format);
		75	return {0, 0, 0, 0};
		76	}
		77	}
		78
56	template <typename T>	79	template <typename T>
57	inline void Write(u32 addr, const T data) {	80	inline void Write(u32 addr, const T data) {
58	addr -= HW::VADDR_GPU;	81	addr -= HW::VADDR_GPU;
@@ -125,11 +148,18 @@ inline void Write(u32 addr, const T data) {
125	break;	148	break;
126	}	149	}
127		150
128	unsigned horizontal_scale = (config.scaling != config.NoScale) ? 2 : 1;	151	if (config.output_tiled &&
129	unsigned vertical_scale = (config.scaling == config.ScaleXY) ? 2 : 1;	152	(config.scaling == config.ScaleXY \|\| config.scaling == config.ScaleX)) {
		153	LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
		154	UNIMPLEMENTED();
		155	break;
		156	}
130		157
131	u32 output_width = config.output_width / horizontal_scale;	158	bool horizontal_scale = config.scaling != config.NoScale;
132	u32 output_height = config.output_height / vertical_scale;	159	bool vertical_scale = config.scaling == config.ScaleXY;
		160
		161	u32 output_width = config.output_width >> horizontal_scale;
		162	u32 output_height = config.output_height >> vertical_scale;
133		163
134	u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);	164	u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
135	u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);	165	u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
@@ -153,16 +183,14 @@ inline void Write(u32 addr, const T data) {
153	break;	183	break;
154	}	184	}
155		185
156	// TODO(Subv): Implement the box filter when scaling is enabled
157	// right now we're just skipping the extra pixels.
158	for (u32 y = 0; y < output_height; ++y) {	186	for (u32 y = 0; y < output_height; ++y) {
159	for (u32 x = 0; x < output_width; ++x) {	187	for (u32 x = 0; x < output_width; ++x) {
160	Math::Vec4<u8> src_color = { 0, 0, 0, 0 };	188	Math::Vec4<u8> src_color;
161		189
162	// Calculate the [x,y] position of the input image	190	// Calculate the [x,y] position of the input image
163	// based on the current output position and the scale	191	// based on the current output position and the scale
164	u32 input_x = x * horizontal_scale;	192	u32 input_x = x << horizontal_scale;
165	u32 input_y = y * vertical_scale;	193	u32 input_y = y << vertical_scale;
166		194
167	if (config.flip_vertically) {	195	if (config.flip_vertically) {
168	// Flip the y value of the output data,	196	// Flip the y value of the output data,
@@ -193,30 +221,15 @@ inline void Write(u32 addr, const T data) {
193	}	221	}
194		222
195	const u8* src_pixel = src_pointer + src_offset;	223	const u8* src_pixel = src_pointer + src_offset;
196	switch (config.input_format) {	224	src_color = DecodePixel(config.input_format, src_pixel);
197	case Regs::PixelFormat::RGBA8:	225	if (config.scaling == config.ScaleX) {
198	src_color = Color::DecodeRGBA8(src_pixel);	226	Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
199	break;	227	src_color = ((src_color + pixel) / 2).Cast<u8>();
200		228	} else if (config.scaling == config.ScaleXY) {
201	case Regs::PixelFormat::RGB8:	229	Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
202	src_color = Color::DecodeRGB8(src_pixel);	230	Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
203	break;	231	Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
204		232	src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
205	case Regs::PixelFormat::RGB565:
206	src_color = Color::DecodeRGB565(src_pixel);
207	break;
208
209	case Regs::PixelFormat::RGB5A1:
210	src_color = Color::DecodeRGB5A1(src_pixel);
211	break;
212
213	case Regs::PixelFormat::RGBA4:
214	src_color = Color::DecodeRGBA4(src_pixel);
215	break;
216
217	default:
218	LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", config.input_format.Value());
219	break;
220	}	233	}
221		234
222	u8* dst_pixel = dst_pointer + dst_offset;	235	u8* dst_pixel = dst_pointer + dst_offset;