summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar Tony Wasserka2015-01-02 20:37:25 +0100
committerGravatar Tony Wasserka2015-02-18 14:50:03 +0100
commit365236fa4c96eaba94b715b6844bff64238b70e5 (patch)
tree798fde7c74933dbc369617c2fd9641fdfa6e375f /src/video_core
parentPica/VertexShader: Implement the LOOP instruction. (diff)
downloadyuzu-365236fa4c96eaba94b715b6844bff64238b70e5.tar.gz
yuzu-365236fa4c96eaba94b715b6844bff64238b70e5.tar.xz
yuzu-365236fa4c96eaba94b715b6844bff64238b70e5.zip
Pica: Cleanup clipping code and change screenspace z to range from -1..0.
The change in depth range seems to reflect better to what applications are expecting, and makes for cleaner code overall (hence is more likely to reflect hardware behavior).
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/clipper.cpp84
-rw-r--r--src/video_core/rasterizer.cpp11
2 files changed, 42 insertions, 53 deletions
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 1744066ba..ba3876a76 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -15,30 +15,18 @@ namespace Clipper {
15 15
16struct ClippingEdge { 16struct ClippingEdge {
17public: 17public:
18 enum Type { 18 ClippingEdge(Math::Vec4<float24> coeffs,
19 POS_X = 0, 19 Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0),
20 NEG_X = 1, 20 float24::FromFloat32(0),
21 POS_Y = 2, 21 float24::FromFloat32(0),
22 NEG_Y = 3, 22 float24::FromFloat32(0)))
23 POS_Z = 4, 23 : coeffs(coeffs),
24 NEG_Z = 5, 24 bias(bias)
25 }; 25 {
26 26 }
27 ClippingEdge(Type type, float24 position) : type(type), pos(position) {}
28 27
29 bool IsInside(const OutputVertex& vertex) const { 28 bool IsInside(const OutputVertex& vertex) const {
30 switch (type) { 29 return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
31 case POS_X: return vertex.pos.x <= pos * vertex.pos.w;
32 case NEG_X: return vertex.pos.x >= pos * vertex.pos.w;
33 case POS_Y: return vertex.pos.y <= pos * vertex.pos.w;
34 case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w;
35
36 // TODO: Check z compares ... should be 0..1 instead?
37 case POS_Z: return vertex.pos.z <= pos * vertex.pos.w;
38
39 default:
40 case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w;
41 }
42 } 30 }
43 31
44 bool IsOutSide(const OutputVertex& vertex) const { 32 bool IsOutSide(const OutputVertex& vertex) const {
@@ -46,31 +34,17 @@ public:
46 } 34 }
47 35
48 OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { 36 OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
49 auto dotpr = [this](const OutputVertex& vtx) { 37 float24 dp = Math::Dot(v0.pos + bias, coeffs);
50 switch (type) { 38 float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
51 case POS_X: return vtx.pos.x - vtx.pos.w;
52 case NEG_X: return -vtx.pos.x - vtx.pos.w;
53 case POS_Y: return vtx.pos.y - vtx.pos.w;
54 case NEG_Y: return -vtx.pos.y - vtx.pos.w;
55
56 // TODO: Verify z clipping
57 case POS_Z: return vtx.pos.z - vtx.pos.w;
58
59 default:
60 case NEG_Z: return -vtx.pos.w;
61 }
62 };
63
64 float24 dp = dotpr(v0);
65 float24 dp_prev = dotpr(v1);
66 float24 factor = dp_prev / (dp_prev - dp); 39 float24 factor = dp_prev / (dp_prev - dp);
67 40
68 return OutputVertex::Lerp(factor, v0, v1); 41 return OutputVertex::Lerp(factor, v0, v1);
69 } 42 }
70 43
71private: 44private:
72 Type type;
73 float24 pos; 45 float24 pos;
46 Math::Vec4<float24> coeffs;
47 Math::Vec4<float24> bias;
74}; 48};
75 49
76static void InitScreenCoordinates(OutputVertex& vtx) 50static void InitScreenCoordinates(OutputVertex& vtx)
@@ -98,10 +72,9 @@ static void InitScreenCoordinates(OutputVertex& vtx)
98 vtx.tc2 *= inv_w; 72 vtx.tc2 *= inv_w;
99 vtx.pos.w = inv_w; 73 vtx.pos.w = inv_w;
100 74
101 // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
102 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; 75 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
103 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; 76 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
104 vtx.screenpos[2] = viewport.offset_z - vtx.pos.z * inv_w * viewport.zscale; 77 vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale;
105} 78}
106 79
107void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { 80void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
@@ -117,14 +90,29 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
117 auto* output_list = &buffer_a; 90 auto* output_list = &buffer_a;
118 auto* input_list = &buffer_b; 91 auto* input_list = &buffer_b;
119 92
93 // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
94 // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
95 // epsilon possible within float24 accuracy.
96 static const float24 EPSILON = float24::FromFloat32(0.00001);
97 static const float24 f0 = float24::FromFloat32(0.0);
98 static const float24 f1 = float24::FromFloat32(1.0);
99 static const std::array<ClippingEdge, 7> clipping_edges = {{
100 { Math::MakeVec( f1, f0, f0, -f1) }, // x = +w
101 { Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w
102 { Math::MakeVec( f0, f1, f0, -f1) }, // y = +w
103 { Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w
104 { Math::MakeVec( f0, f0, f1, f0) }, // z = 0
105 { Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w
106 { Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON
107 }};
108
109 // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii)
110 // drop the whole primitive instead of clipping the primitive properly. We should test if
111 // this happens on the 3DS, too.
112
120 // Simple implementation of the Sutherland-Hodgman clipping algorithm. 113 // Simple implementation of the Sutherland-Hodgman clipping algorithm.
121 // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) 114 // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
122 for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)), 115 for (auto edge : clipping_edges) {
123 ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)),
124 ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)),
125 ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)),
126 ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)),
127 ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) {
128 116
129 std::swap(input_list, output_list); 117 std::swap(input_list, output_list);
130 output_list->clear(); 118 output_list->clear();
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 3faa10153..046c010ef 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -106,16 +106,17 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
106 ScreenToRasterizerCoordinates(v1.screenpos), 106 ScreenToRasterizerCoordinates(v1.screenpos),
107 ScreenToRasterizerCoordinates(v2.screenpos) }; 107 ScreenToRasterizerCoordinates(v2.screenpos) };
108 108
109 if (registers.cull_mode == Regs::CullMode::KeepClockWise) { 109 if (registers.cull_mode == Regs::CullMode::KeepCounterClockWise) {
110 // Reverse vertex order and use the CCW code path. 110 // Reverse vertex order and use the CW code path.
111 std::swap(vtxpos[1], vtxpos[2]); 111 std::swap(vtxpos[1], vtxpos[2]);
112 } 112 }
113 113
114 if (registers.cull_mode != Regs::CullMode::KeepAll) { 114 if (registers.cull_mode != Regs::CullMode::KeepAll) {
115 // Cull away triangles which are wound clockwise. 115 // Cull away triangles which are wound counter-clockwise.
116 // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
117 if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) 116 if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
118 return; 117 return;
118 } else {
119 // TODO: Consider A check for degenerate triangles ("SignedArea == 0")
119 } 120 }
120 121
121 // TODO: Proper scissor rect test! 122 // TODO: Proper scissor rect test!
@@ -475,7 +476,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
475 476
476 // TODO: Does depth indeed only get written even if depth testing is enabled? 477 // TODO: Does depth indeed only get written even if depth testing is enabled?
477 if (registers.output_merger.depth_test_enable) { 478 if (registers.output_merger.depth_test_enable) {
478 u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 + 479 u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
479 v1.screenpos[2].ToFloat32() * w1 + 480 v1.screenpos[2].ToFloat32() * w1 +
480 v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); 481 v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
481 u16 ref_z = GetDepth(x >> 4, y >> 4); 482 u16 ref_z = GetDepth(x >> 4, y >> 4);