Pica: Cleanup clipping code and change screenspace z to range from -1..0.

The change in depth range seems to reflect better to what applications are expecting, and makes for cleaner code overall (hence is more likely to reflect hardware behavior).
author: Tony Wasserka 2015-01-02 20:37:25 +0100
committer: Tony Wasserka 2015-02-18 14:50:03 +0100
commit: 365236fa4c96eaba94b715b6844bff64238b70e5 (patch)
tree: 798fde7c74933dbc369617c2fd9641fdfa6e375f /src/video_core
parent: Pica/VertexShader: Implement the LOOP instruction. (diff)
download: yuzu-365236fa4c96eaba94b715b6844bff64238b70e5.tar.gz
yuzu-365236fa4c96eaba94b715b6844bff64238b70e5.tar.xz
yuzu-365236fa4c96eaba94b715b6844bff64238b70e5.zip
2 files changed, 42 insertions, 53 deletions
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 1744066ba..ba3876a76 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -15,30 +15,18 @@ namespace Clipper {
 struct ClippingEdge {
 public:
-    enum Type {
+    ClippingEdge(Math::Vec4<float24> coeffs,
-        POS_X = 0,
+                 Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0),
-        NEG_X = 1,
+                                                                float24::FromFloat32(0),
-        POS_Y = 2,
+                                                                float24::FromFloat32(0),
-        NEG_Y = 3,
+                                                                float24::FromFloat32(0)))
-        POS_Z = 4,
+        : coeffs(coeffs),
-        NEG_Z = 5,
+          bias(bias)
-    };
+    {
+    }
-    ClippingEdge(Type type, float24 position) : type(type), pos(position) {}
    bool IsInside(const OutputVertex& vertex) const {
-        switch (type) {
+        return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
-        case POS_X: return vertex.pos.x <= pos * vertex.pos.w;
-        case NEG_X: return vertex.pos.x >= pos * vertex.pos.w;
-        case POS_Y: return vertex.pos.y <= pos * vertex.pos.w;
-        case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w;
-        // TODO: Check z compares ... should be 0..1 instead?
-        case POS_Z: return vertex.pos.z <= pos * vertex.pos.w;
-        default:
-        case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w;
-        }
    }
    bool IsOutSide(const OutputVertex& vertex) const {
@@ -46,31 +34,17 @@ public:
    }
    OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
-        auto dotpr = [this](const OutputVertex& vtx) {
+        float24 dp = Math::Dot(v0.pos + bias, coeffs);
-            switch (type) {
+        float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
-            case POS_X: return vtx.pos.x - vtx.pos.w;
-            case NEG_X: return -vtx.pos.x - vtx.pos.w;
-            case POS_Y: return vtx.pos.y - vtx.pos.w;
-            case NEG_Y: return -vtx.pos.y - vtx.pos.w;
-            // TODO: Verify z clipping
-            case POS_Z: return vtx.pos.z - vtx.pos.w;
-            default:
-            case NEG_Z: return -vtx.pos.w;
-            }
-        };
-        float24 dp = dotpr(v0);
-        float24 dp_prev = dotpr(v1);
        float24 factor = dp_prev / (dp_prev - dp);
        return OutputVertex::Lerp(factor, v0, v1);
    }
 private:
-    Type type;
    float24 pos;
+    Math::Vec4<float24> coeffs;
+    Math::Vec4<float24> bias;
 };
 static void InitScreenCoordinates(OutputVertex& vtx)
@@ -98,10 +72,9 @@ static void InitScreenCoordinates(OutputVertex& vtx)
    vtx.tc2 *= inv_w;
    vtx.pos.w = inv_w;
-    // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
    vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
    vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
-    vtx.screenpos[2] = viewport.offset_z - vtx.pos.z * inv_w * viewport.zscale;
+    vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale;
 }
 void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
@@ -117,14 +90,29 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
    auto* output_list = &buffer_a;
    auto* input_list  = &buffer_b;
+    // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
+    // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
+    //       epsilon possible within float24 accuracy.
+    static const float24 EPSILON = float24::FromFloat32(0.00001);
+    static const float24 f0 = float24::FromFloat32(0.0);
+    static const float24 f1 = float24::FromFloat32(1.0);
+    static const std::array<ClippingEdge, 7> clipping_edges = {{
+        { Math::MakeVec( f1,  f0,  f0, -f1) },  // x = +w
+        { Math::MakeVec(-f1,  f0,  f0, -f1) },  // x = -w
+        { Math::MakeVec( f0,  f1,  f0, -f1) },  // y = +w
+        { Math::MakeVec( f0, -f1,  f0, -f1) },  // y = -w
+        { Math::MakeVec( f0,  f0,  f1,  f0) },  // z =  0
+        { Math::MakeVec( f0,  f0, -f1, -f1) },  // z = -w
+        { Math::MakeVec( f0,  f0,  f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON
+    }};
+    // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii)
+    //       drop the whole primitive instead of clipping the primitive properly. We should test if
+    //       this happens on the 3DS, too.
    // Simple implementation of the Sutherland-Hodgman clipping algorithm.
    // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
-    for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)),
+    for (auto edge : clipping_edges) {
-                       ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)),
-                       ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)),
-                       ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)),
-                       ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)),
-                       ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) {
        std::swap(input_list, output_list);
        output_list->clear();
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 3faa10153..046c010ef 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -106,16 +106,17 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                                   ScreenToRasterizerCoordinates(v1.screenpos),
                                   ScreenToRasterizerCoordinates(v2.screenpos) };
-    if (registers.cull_mode == Regs::CullMode::KeepClockWise) {
+    if (registers.cull_mode == Regs::CullMode::KeepCounterClockWise) {
-        // Reverse vertex order and use the CCW code path.
+        // Reverse vertex order and use the CW code path.
        std::swap(vtxpos[1], vtxpos[2]);
    }
    if (registers.cull_mode != Regs::CullMode::KeepAll) {
-        // Cull away triangles which are wound clockwise.
+        // Cull away triangles which are wound counter-clockwise.
-        // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
        if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
            return;
+    } else {
+        // TODO: Consider A check for degenerate triangles ("SignedArea == 0")
    }
    // TODO: Proper scissor rect test!
@@ -475,7 +476,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
            // TODO: Does depth indeed only get written even if depth testing is enabled?
            if (registers.output_merger.depth_test_enable) {
-                u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 +
+                u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
                            v1.screenpos[2].ToFloat32() * w1 +
                            v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
                u16 ref_z = GetDepth(x >> 4, y >> 4);
author	Tony Wasserka	2015-01-02 20:37:25 +0100
committer	Tony Wasserka	2015-02-18 14:50:03 +0100
commit	365236fa4c96eaba94b715b6844bff64238b70e5 (patch)
tree	798fde7c74933dbc369617c2fd9641fdfa6e375f /src/video_core
parent	Pica/VertexShader: Implement the LOOP instruction. (diff)
download	yuzu-365236fa4c96eaba94b715b6844bff64238b70e5.tar.gz yuzu-365236fa4c96eaba94b715b6844bff64238b70e5.tar.xz yuzu-365236fa4c96eaba94b715b6844bff64238b70e5.zip

diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 1744066ba..ba3876a76 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp
@@ -15,30 +15,18 @@ namespace Clipper {
15		15
16	struct ClippingEdge {	16	struct ClippingEdge {
17	public:	17	public:
18	enum Type {	18	ClippingEdge(Math::Vec4<float24> coeffs,
19	POS_X = 0,	19	Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0),
20	NEG_X = 1,	20	float24::FromFloat32(0),
21	POS_Y = 2,	21	float24::FromFloat32(0),
22	NEG_Y = 3,	22	float24::FromFloat32(0)))
23	POS_Z = 4,	23	: coeffs(coeffs),
24	NEG_Z = 5,	24	bias(bias)
25	};	25	{
26		26	}
27	ClippingEdge(Type type, float24 position) : type(type), pos(position) {}
28		27
29	bool IsInside(const OutputVertex& vertex) const {	28	bool IsInside(const OutputVertex& vertex) const {
30	switch (type) {	29	return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
31	case POS_X: return vertex.pos.x <= pos * vertex.pos.w;
32	case NEG_X: return vertex.pos.x >= pos * vertex.pos.w;
33	case POS_Y: return vertex.pos.y <= pos * vertex.pos.w;
34	case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w;
35
36	// TODO: Check z compares ... should be 0..1 instead?
37	case POS_Z: return vertex.pos.z <= pos * vertex.pos.w;
38
39	default:
40	case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w;
41	}
42	}	30	}
43		31
44	bool IsOutSide(const OutputVertex& vertex) const {	32	bool IsOutSide(const OutputVertex& vertex) const {
@@ -46,31 +34,17 @@ public:
46	}	34	}
47		35
48	OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {	36	OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
49	auto dotpr = [this](const OutputVertex& vtx) {	37	float24 dp = Math::Dot(v0.pos + bias, coeffs);
50	switch (type) {	38	float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
51	case POS_X: return vtx.pos.x - vtx.pos.w;
52	case NEG_X: return -vtx.pos.x - vtx.pos.w;
53	case POS_Y: return vtx.pos.y - vtx.pos.w;
54	case NEG_Y: return -vtx.pos.y - vtx.pos.w;
55
56	// TODO: Verify z clipping
57	case POS_Z: return vtx.pos.z - vtx.pos.w;
58
59	default:
60	case NEG_Z: return -vtx.pos.w;
61	}
62	};
63
64	float24 dp = dotpr(v0);
65	float24 dp_prev = dotpr(v1);
66	float24 factor = dp_prev / (dp_prev - dp);	39	float24 factor = dp_prev / (dp_prev - dp);
67		40
68	return OutputVertex::Lerp(factor, v0, v1);	41	return OutputVertex::Lerp(factor, v0, v1);
69	}	42	}
70		43
71	private:	44	private:
72	Type type;
73	float24 pos;	45	float24 pos;
		46	Math::Vec4<float24> coeffs;
		47	Math::Vec4<float24> bias;
74	};	48	};
75		49
76	static void InitScreenCoordinates(OutputVertex& vtx)	50	static void InitScreenCoordinates(OutputVertex& vtx)
@@ -98,10 +72,9 @@ static void InitScreenCoordinates(OutputVertex& vtx)
98	vtx.tc2 *= inv_w;	72	vtx.tc2 *= inv_w;
99	vtx.pos.w = inv_w;	73	vtx.pos.w = inv_w;
100		74
101	// TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
102	vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;	75	vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
103	vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;	76	vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
104	vtx.screenpos[2] = viewport.offset_z - vtx.pos.z * inv_w * viewport.zscale;	77	vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale;
105	}	78	}
106		79
107	void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {	80	void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
@@ -117,14 +90,29 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
117	auto* output_list = &buffer_a;	90	auto* output_list = &buffer_a;
118	auto* input_list = &buffer_b;	91	auto* input_list = &buffer_b;
119		92
		93	// NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
		94	// TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
		95	// epsilon possible within float24 accuracy.
		96	static const float24 EPSILON = float24::FromFloat32(0.00001);
		97	static const float24 f0 = float24::FromFloat32(0.0);
		98	static const float24 f1 = float24::FromFloat32(1.0);
		99	static const std::array<ClippingEdge, 7> clipping_edges = {{
		100	{ Math::MakeVec( f1, f0, f0, -f1) }, // x = +w
		101	{ Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w
		102	{ Math::MakeVec( f0, f1, f0, -f1) }, // y = +w
		103	{ Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w
		104	{ Math::MakeVec( f0, f0, f1, f0) }, // z = 0
		105	{ Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w
		106	{ Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON
		107	}};
		108
		109	// TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii)
		110	// drop the whole primitive instead of clipping the primitive properly. We should test if
		111	// this happens on the 3DS, too.
		112
120	// Simple implementation of the Sutherland-Hodgman clipping algorithm.	113	// Simple implementation of the Sutherland-Hodgman clipping algorithm.
121	// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)	114	// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
122	for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)),	115	for (auto edge : clipping_edges) {
123	ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)),
124	ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)),
125	ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)),
126	ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)),
127	ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) {
128		116
129	std::swap(input_list, output_list);	117	std::swap(input_list, output_list);
130	output_list->clear();	118	output_list->clear();


diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 3faa10153..046c010ef 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp
@@ -106,16 +106,17 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
106	ScreenToRasterizerCoordinates(v1.screenpos),	106	ScreenToRasterizerCoordinates(v1.screenpos),
107	ScreenToRasterizerCoordinates(v2.screenpos) };	107	ScreenToRasterizerCoordinates(v2.screenpos) };
108		108
109	if (registers.cull_mode == Regs::CullMode::KeepClockWise) {	109	if (registers.cull_mode == Regs::CullMode::KeepCounterClockWise) {
110	// Reverse vertex order and use the CCW code path.	110	// Reverse vertex order and use the CW code path.
111	std::swap(vtxpos[1], vtxpos[2]);	111	std::swap(vtxpos[1], vtxpos[2]);
112	}	112	}
113		113
114	if (registers.cull_mode != Regs::CullMode::KeepAll) {	114	if (registers.cull_mode != Regs::CullMode::KeepAll) {
115	// Cull away triangles which are wound clockwise.	115	// Cull away triangles which are wound counter-clockwise.
116	// TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
117	if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)	116	if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
118	return;	117	return;
		118	} else {
		119	// TODO: Consider A check for degenerate triangles ("SignedArea == 0")
119	}	120	}
120		121
121	// TODO: Proper scissor rect test!	122	// TODO: Proper scissor rect test!
@@ -475,7 +476,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
475		476
476	// TODO: Does depth indeed only get written even if depth testing is enabled?	477	// TODO: Does depth indeed only get written even if depth testing is enabled?
477	if (registers.output_merger.depth_test_enable) {	478	if (registers.output_merger.depth_test_enable) {
478	u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 +	479	u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
479	v1.screenpos[2].ToFloat32() * w1 +	480	v1.screenpos[2].ToFloat32() * w1 +
480	v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);	481	v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
481	u16 ref_z = GetDepth(x >> 4, y >> 4);	482	u16 ref_z = GetDepth(x >> 4, y >> 4);