1 files changed, 195 insertions, 46 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a80148872..025d4e484 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -18,51 +18,82 @@ namespace Pica {
 namespace Rasterizer {
 static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
-    u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
+    const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
+    u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
    u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
    // Assuming RGBA8 format until actual framebuffer format handling is implemented
    *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value;
 }
+static const Math::Vec4<u8> GetPixel(int x, int y) {
+    const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
+    u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
+    u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth());
+    Math::Vec4<u8> ret;
+    ret.a() = value >> 24;
+    ret.r() = (value >> 16) & 0xFF;
+    ret.g() = (value >> 8) & 0xFF;
+    ret.b() = value & 0xFF;
+    return ret;
+ }
 static u32 GetDepth(int x, int y) {
-    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
+    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
+    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
    // Assuming 16-bit depth buffer format until actual format handling is implemented
    return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
 }
 static void SetDepth(int x, int y, u16 value) {
-    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
+    const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
+    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
    // Assuming 16-bit depth buffer format until actual format handling is implemented
    *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
 }
-void ProcessTriangle(const VertexShader::OutputVertex& v0,
+// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
-                     const VertexShader::OutputVertex& v1,
+struct Fix12P4 {
-                     const VertexShader::OutputVertex& v2)
+    Fix12P4() {}
-{
+    Fix12P4(u16 val) : val(val) {}
-    // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
-    struct Fix12P4 {
-        Fix12P4() {}
-        Fix12P4(u16 val) : val(val) {}
-        static u16 FracMask() { return 0xF; }
+    static u16 FracMask() { return 0xF; }
-        static u16 IntMask() { return (u16)~0xF; }
+    static u16 IntMask() { return (u16)~0xF; }
-        operator u16() const {
+    operator u16() const {
-            return val;
+        return val;
-        }
+    }
-        bool operator < (const Fix12P4& oth) const {
+    bool operator < (const Fix12P4& oth) const {
-            return (u16)*this < (u16)oth;
+        return (u16)*this < (u16)oth;
-        }
+    }
-    private:
+private:
-        u16 val;
+    u16 val;
-    };
+};
+/**
+ * Calculate signed area of the triangle spanned by the three argument vertices.
+ * The sign denotes an orientation.
+ *
+ * @todo define orientation concretely.
+ */
+static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
+                       const Math::Vec2<Fix12P4>& vtx2,
+                       const Math::Vec2<Fix12P4>& vtx3) {
+    const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
+    const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
+    // TODO: There is a very small chance this will overflow for sizeof(int) == 4
+    return Math::Cross(vec1, vec2).z;
+};
+void ProcessTriangle(const VertexShader::OutputVertex& v0,
+                     const VertexShader::OutputVertex& v1,
+                     const VertexShader::OutputVertex& v2)
+{
    // vertex positions in rasterizer coordinates
    auto FloatToFix = [](float24 flt) {
                          return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f));
@@ -70,10 +101,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
    auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {
                                             return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
                                         };
    Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),
                                   ScreenToRasterizerCoordinates(v1.screenpos),
                                   ScreenToRasterizerCoordinates(v2.screenpos) };
+    if (registers.cull_mode == Regs::CullMode::KeepClockWise) {
+        // Reverse vertex order and use the CCW code path.
+        std::swap(vtxpos[1], vtxpos[2]);
+    }
+    if (registers.cull_mode != Regs::CullMode::KeepAll) {
+        // Cull away triangles which are wound clockwise.
+        // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
+        if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
+            return;
+    }
    // TODO: Proper scissor rect test!
    u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
    u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
@@ -116,18 +160,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
        for (u16 x = min_x; x < max_x; x += 0x10) {
            // Calculate the barycentric coordinates w0, w1 and w2
-            auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
+            int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
-                               const Math::Vec2<Fix12P4>& vtx2,
+            int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
-                               const Math::Vec2<Fix12P4>& vtx3) {
+            int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
-                const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
-                const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
-                // TODO: There is a very small chance this will overflow for sizeof(int) == 4
-                return Math::Cross(vec1, vec2).z;
-            };
-            int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
-            int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
-            int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
            int wsum = w0 + w1 + w2;
            // If current pixel is not covered by the current primitive
@@ -201,8 +236,8 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                            return 0;
                    }
                };
-                s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
+                s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
-                t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
+                t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
                u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
                auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
@@ -279,12 +314,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                    }
                };
-                auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
+                static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
                    switch (factor)
                    {
                    case ColorModifier::SourceColor:
                        return values.rgb();
+                    case ColorModifier::OneMinusSourceColor:
+                        return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
                    case ColorModifier::SourceAlpha:
                        return { values.a(), values.a(), values.a() };
@@ -295,7 +333,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                    }
                };
-                auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
+                static auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
                    switch (factor) {
                    case AlphaModifier::SourceAlpha:
                        return value;
@@ -310,7 +348,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                    }
                };
-                auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
+                static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
                    switch (op) {
                    case Operation::Replace:
                        return input[0];
@@ -330,6 +368,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                    case Operation::Lerp:
                        return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
+                    case Operation::Subtract:
+                    {
+                        auto result = input[0].Cast<int>() - input[1].Cast<int>();
+                        result.r() = std::max(0, result.r());
+                        result.g() = std::max(0, result.g());
+                        result.b() = std::max(0, result.b());
+                        return result.Cast<u8>();
+                    }
                    default:
                        LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
                        _dbg_assert_(HW_GPU, 0);
@@ -337,7 +384,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                    }
                };
-                auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
+                static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
                    switch (op) {
                    case Operation::Replace:
                        return input[0];
@@ -351,6 +398,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                    case Operation::Lerp:
                        return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
+                    case Operation::Subtract:
+                        return std::max(0, (int)input[0] - (int)input[1]);
                    default:
                        LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
                        _dbg_assert_(HW_GPU, 0);
@@ -381,12 +431,111 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                combiner_output = Math::MakeVec(color_output, alpha_output);
            }
-            // TODO: Not sure if the multiplication by 65535 has already been taken care
+            // TODO: Does depth indeed only get written even if depth testing is enabled?
-            // of when transforming to screen coordinates or not.
+            if (registers.output_merger.depth_test_enable) {
-            u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
+                u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 +
-                           (float)v1.screenpos[2].ToFloat32() * w1 +
+                            v1.screenpos[2].ToFloat32() * w1 +
-                           (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
+                            v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
-            SetDepth(x >> 4, y >> 4, z);
+                u16 ref_z = GetDepth(x >> 4, y >> 4);
+                bool pass = false;
+                switch (registers.output_merger.depth_test_func) {
+                case registers.output_merger.Always:
+                    pass = true;
+                    break;
+                case registers.output_merger.LessThan:
+                    pass = z < ref_z;
+                    break;
+                case registers.output_merger.GreaterThan:
+                    pass = z > ref_z;
+                    break;
+                default:
+                    LOG_ERROR(HW_GPU, "Unknown depth test function %x", registers.output_merger.depth_test_func.Value());
+                    break;
+                }
+                if (!pass)
+                    continue;
+                if (registers.output_merger.depth_write_enable)
+                    SetDepth(x >> 4, y >> 4, z);
+            }
+            auto dest = GetPixel(x >> 4, y >> 4);
+            if (registers.output_merger.alphablend_enable) {
+                auto params = registers.output_merger.alpha_blending;
+                auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> {
+                    switch(factor) {
+                    case params.Zero:
+                        return Math::Vec3<u8>(0, 0, 0);
+                    case params.One:
+                        return Math::Vec3<u8>(255, 255, 255);
+                    case params.SourceAlpha:
+                        return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a());
+                    case params.OneMinusSourceAlpha:
+                        return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a());
+                    default:
+                        LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
+                        exit(0);
+                        break;
+                    }
+                };
+                auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 {
+                    switch(factor) {
+                    case params.Zero:
+                        return 0;
+                    case params.One:
+                        return 255;
+                    case params.SourceAlpha:
+                        return combiner_output.a();
+                    case params.OneMinusSourceAlpha:
+                        return 255 - combiner_output.a();
+                    default:
+                        LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
+                        exit(0);
+                        break;
+                    }
+                };
+                auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb),
+                                               LookupFactorA(params.factor_source_a));
+                auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb),
+                                               LookupFactorA(params.factor_dest_a));
+                switch (params.blend_equation_rgb) {
+                case params.Add:
+                {
+                    auto result = (combiner_output * srcfactor + dest * dstfactor) / 255;
+                    result.r() = std::min(255, result.r());
+                    result.g() = std::min(255, result.g());
+                    result.b() = std::min(255, result.b());
+                    combiner_output = result.Cast<u8>();
+                    break;
+                }
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value());
+                    exit(0);
+                }
+            } else {
+                LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op);
+                exit(0);
+            }
            DrawPixel(x >> 4, y >> 4, combiner_output);
        }

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a80148872..025d4e484 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp
@@ -18,51 +18,82 @@ namespace Pica {
18	namespace Rasterizer {	18	namespace Rasterizer {
19		19
20	static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {	20	static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
21	u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));	21	const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
		22	u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
22	u32 value = (color.a() << 24) \| (color.r() << 16) \| (color.g() << 8) \| color.b();	23	u32 value = (color.a() << 24) \| (color.r() << 16) \| (color.g() << 8) \| color.b();
23		24
24	// Assuming RGBA8 format until actual framebuffer format handling is implemented	25	// Assuming RGBA8 format until actual framebuffer format handling is implemented
25	(color_buffer + x + y registers.framebuffer.GetWidth()) = value;	26	(color_buffer + x + y registers.framebuffer.GetWidth()) = value;
26	}	27	}
27		28
		29	static const Math::Vec4<u8> GetPixel(int x, int y) {
		30	const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
		31	u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
		32
		33	u32 value = (color_buffer_u32 + x + y registers.framebuffer.GetWidth());
		34	Math::Vec4<u8> ret;
		35	ret.a() = value >> 24;
		36	ret.r() = (value >> 16) & 0xFF;
		37	ret.g() = (value >> 8) & 0xFF;
		38	ret.b() = value & 0xFF;
		39	return ret;
		40	}
		41
28	static u32 GetDepth(int x, int y) {	42	static u32 GetDepth(int x, int y) {
29	u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));	43	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
		44	u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
30		45
31	// Assuming 16-bit depth buffer format until actual format handling is implemented	46	// Assuming 16-bit depth buffer format until actual format handling is implemented
32	return (depth_buffer + x + y registers.framebuffer.GetWidth());	47	return (depth_buffer + x + y registers.framebuffer.GetWidth());
33	}	48	}
34		49
35	static void SetDepth(int x, int y, u16 value) {	50	static void SetDepth(int x, int y, u16 value) {
36	u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));	51	const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
		52	u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
37		53
38	// Assuming 16-bit depth buffer format until actual format handling is implemented	54	// Assuming 16-bit depth buffer format until actual format handling is implemented
39	(depth_buffer + x + y registers.framebuffer.GetWidth()) = value;	55	(depth_buffer + x + y registers.framebuffer.GetWidth()) = value;
40	}	56	}
41		57
42	void ProcessTriangle(const VertexShader::OutputVertex& v0,	58	// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
43	const VertexShader::OutputVertex& v1,	59	struct Fix12P4 {
44	const VertexShader::OutputVertex& v2)	60	Fix12P4() {}
45	{	61	Fix12P4(u16 val) : val(val) {}
46	// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
47	struct Fix12P4 {
48	Fix12P4() {}
49	Fix12P4(u16 val) : val(val) {}
50		62
51	static u16 FracMask() { return 0xF; }	63	static u16 FracMask() { return 0xF; }
52	static u16 IntMask() { return (u16)~0xF; }	64	static u16 IntMask() { return (u16)~0xF; }
53		65
54	operator u16() const {	66	operator u16() const {
55	return val;	67	return val;
56	}	68	}
57		69
58	bool operator < (const Fix12P4& oth) const {	70	bool operator < (const Fix12P4& oth) const {
59	return (u16)*this < (u16)oth;	71	return (u16)*this < (u16)oth;
60	}	72	}
61		73
62	private:	74	private:
63	u16 val;	75	u16 val;
64	};	76	};
		77
		78	/**
		79	* Calculate signed area of the triangle spanned by the three argument vertices.
		80	* The sign denotes an orientation.
		81	*
		82	* @todo define orientation concretely.
		83	*/
		84	static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
		85	const Math::Vec2<Fix12P4>& vtx2,
		86	const Math::Vec2<Fix12P4>& vtx3) {
		87	const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
		88	const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
		89	// TODO: There is a very small chance this will overflow for sizeof(int) == 4
		90	return Math::Cross(vec1, vec2).z;
		91	};
65		92
		93	void ProcessTriangle(const VertexShader::OutputVertex& v0,
		94	const VertexShader::OutputVertex& v1,
		95	const VertexShader::OutputVertex& v2)
		96	{
66	// vertex positions in rasterizer coordinates	97	// vertex positions in rasterizer coordinates
67	auto FloatToFix = [](float24 flt) {	98	auto FloatToFix = [](float24 flt) {
68	return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f));	99	return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f));
@@ -70,10 +101,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
70	auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {	101	auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {
71	return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};	102	return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
72	};	103	};
		104
73	Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),	105	Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),
74	ScreenToRasterizerCoordinates(v1.screenpos),	106	ScreenToRasterizerCoordinates(v1.screenpos),
75	ScreenToRasterizerCoordinates(v2.screenpos) };	107	ScreenToRasterizerCoordinates(v2.screenpos) };
76		108
		109	if (registers.cull_mode == Regs::CullMode::KeepClockWise) {
		110	// Reverse vertex order and use the CCW code path.
		111	std::swap(vtxpos[1], vtxpos[2]);
		112	}
		113
		114	if (registers.cull_mode != Regs::CullMode::KeepAll) {
		115	// Cull away triangles which are wound clockwise.
		116	// TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
		117	if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
		118	return;
		119	}
		120
77	// TODO: Proper scissor rect test!	121	// TODO: Proper scissor rect test!
78	u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});	122	u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
79	u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});	123	u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
@@ -116,18 +160,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
116	for (u16 x = min_x; x < max_x; x += 0x10) {	160	for (u16 x = min_x; x < max_x; x += 0x10) {
117		161
118	// Calculate the barycentric coordinates w0, w1 and w2	162	// Calculate the barycentric coordinates w0, w1 and w2
119	auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,	163	int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
120	const Math::Vec2<Fix12P4>& vtx2,	164	int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
121	const Math::Vec2<Fix12P4>& vtx3) {	165	int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
122	const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
123	const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
124	// TODO: There is a very small chance this will overflow for sizeof(int) == 4
125	return Math::Cross(vec1, vec2).z;
126	};
127
128	int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
129	int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
130	int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
131	int wsum = w0 + w1 + w2;	166	int wsum = w0 + w1 + w2;
132		167
133	// If current pixel is not covered by the current primitive	168	// If current pixel is not covered by the current primitive
@@ -201,8 +236,8 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
201	return 0;	236	return 0;
202	}	237	}
203	};	238	};
204	s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);	239	s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
205	t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);	240	t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
206		241
207	u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));	242	u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
208	auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);	243	auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
@@ -279,12 +314,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
279	}	314	}
280	};	315	};
281		316
282	auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {	317	static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
283	switch (factor)	318	switch (factor)
284	{	319	{
285	case ColorModifier::SourceColor:	320	case ColorModifier::SourceColor:
286	return values.rgb();	321	return values.rgb();
287		322
		323	case ColorModifier::OneMinusSourceColor:
		324	return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
		325
288	case ColorModifier::SourceAlpha:	326	case ColorModifier::SourceAlpha:
289	return { values.a(), values.a(), values.a() };	327	return { values.a(), values.a(), values.a() };
290		328
@@ -295,7 +333,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
295	}	333	}
296	};	334	};
297		335
298	auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {	336	static auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
299	switch (factor) {	337	switch (factor) {
300	case AlphaModifier::SourceAlpha:	338	case AlphaModifier::SourceAlpha:
301	return value;	339	return value;
@@ -310,7 +348,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
310	}	348	}
311	};	349	};
312		350
313	auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {	351	static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
314	switch (op) {	352	switch (op) {
315	case Operation::Replace:	353	case Operation::Replace:
316	return input[0];	354	return input[0];
@@ -330,6 +368,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
330	case Operation::Lerp:	368	case Operation::Lerp:
331	return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();	369	return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
332		370
		371	case Operation::Subtract:
		372	{
		373	auto result = input[0].Cast<int>() - input[1].Cast<int>();
		374	result.r() = std::max(0, result.r());
		375	result.g() = std::max(0, result.g());
		376	result.b() = std::max(0, result.b());
		377	return result.Cast<u8>();
		378	}
		379
333	default:	380	default:
334	LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);	381	LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
335	_dbg_assert_(HW_GPU, 0);	382	_dbg_assert_(HW_GPU, 0);
@@ -337,7 +384,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
337	}	384	}
338	};	385	};
339		386
340	auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {	387	static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
341	switch (op) {	388	switch (op) {
342	case Operation::Replace:	389	case Operation::Replace:
343	return input[0];	390	return input[0];
@@ -351,6 +398,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
351	case Operation::Lerp:	398	case Operation::Lerp:
352	return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;	399	return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
353		400
		401	case Operation::Subtract:
		402	return std::max(0, (int)input[0] - (int)input[1]);
		403
354	default:	404	default:
355	LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);	405	LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
356	_dbg_assert_(HW_GPU, 0);	406	_dbg_assert_(HW_GPU, 0);
@@ -381,12 +431,111 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
381	combiner_output = Math::MakeVec(color_output, alpha_output);	431	combiner_output = Math::MakeVec(color_output, alpha_output);
382	}	432	}
383		433
384	// TODO: Not sure if the multiplication by 65535 has already been taken care	434	// TODO: Does depth indeed only get written even if depth testing is enabled?
385	// of when transforming to screen coordinates or not.	435	if (registers.output_merger.depth_test_enable) {
386	u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +	436	u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 +
387	(float)v1.screenpos[2].ToFloat32() * w1 +	437	v1.screenpos[2].ToFloat32() * w1 +
388	(float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);	438	v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
389	SetDepth(x >> 4, y >> 4, z);	439	u16 ref_z = GetDepth(x >> 4, y >> 4);
		440
		441	bool pass = false;
		442
		443	switch (registers.output_merger.depth_test_func) {
		444	case registers.output_merger.Always:
		445	pass = true;
		446	break;
		447
		448	case registers.output_merger.LessThan:
		449	pass = z < ref_z;
		450	break;
		451
		452	case registers.output_merger.GreaterThan:
		453	pass = z > ref_z;
		454	break;
		455
		456	default:
		457	LOG_ERROR(HW_GPU, "Unknown depth test function %x", registers.output_merger.depth_test_func.Value());
		458	break;
		459	}
		460
		461	if (!pass)
		462	continue;
		463
		464	if (registers.output_merger.depth_write_enable)
		465	SetDepth(x >> 4, y >> 4, z);
		466	}
		467
		468	auto dest = GetPixel(x >> 4, y >> 4);
		469
		470	if (registers.output_merger.alphablend_enable) {
		471	auto params = registers.output_merger.alpha_blending;
		472
		473	auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> {
		474	switch(factor) {
		475	case params.Zero:
		476	return Math::Vec3<u8>(0, 0, 0);
		477
		478	case params.One:
		479	return Math::Vec3<u8>(255, 255, 255);
		480
		481	case params.SourceAlpha:
		482	return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a());
		483
		484	case params.OneMinusSourceAlpha:
		485	return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a());
		486
		487	default:
		488	LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
		489	exit(0);
		490	break;
		491	}
		492	};
		493
		494	auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 {
		495	switch(factor) {
		496	case params.Zero:
		497	return 0;
		498
		499	case params.One:
		500	return 255;
		501
		502	case params.SourceAlpha:
		503	return combiner_output.a();
		504
		505	case params.OneMinusSourceAlpha:
		506	return 255 - combiner_output.a();
		507
		508	default:
		509	LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
		510	exit(0);
		511	break;
		512	}
		513	};
		514
		515	auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb),
		516	LookupFactorA(params.factor_source_a));
		517	auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb),
		518	LookupFactorA(params.factor_dest_a));
		519
		520	switch (params.blend_equation_rgb) {
		521	case params.Add:
		522	{
		523	auto result = (combiner_output * srcfactor + dest * dstfactor) / 255;
		524	result.r() = std::min(255, result.r());
		525	result.g() = std::min(255, result.g());
		526	result.b() = std::min(255, result.b());
		527	combiner_output = result.Cast<u8>();
		528	break;
		529	}
		530
		531	default:
		532	LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value());
		533	exit(0);
		534	}
		535	} else {
		536	LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op);
		537	exit(0);
		538	}
390		539
391	DrawPixel(x >> 4, y >> 4, combiner_output);	540	DrawPixel(x >> 4, y >> 4, combiner_output);
392	}	541	}