summaryrefslogtreecommitdiff
path: root/src/video_core/clipper.cpp
diff options
context:
space:
mode:
authorGravatar Tony Wasserka2014-12-30 00:13:48 +0100
committerGravatar Tony Wasserka2014-12-30 00:13:48 +0100
commitb7e0b16354bc31521785247d7da3ad84f3829ea8 (patch)
tree9ed2e151b59536187946efb3fe34b51507b19c56 /src/video_core/clipper.cpp
parentMerge pull request #361 from lioncash/moreqops (diff)
parentRasterizer: Pre-divide vertex attributes by W (diff)
downloadyuzu-b7e0b16354bc31521785247d7da3ad84f3829ea8.tar.gz
yuzu-b7e0b16354bc31521785247d7da3ad84f3829ea8.tar.xz
yuzu-b7e0b16354bc31521785247d7da3ad84f3829ea8.zip
Merge pull request #351 from yuriks/optimize
Rasterizer Optimizations
Diffstat (limited to 'src/video_core/clipper.cpp')
-rw-r--r--src/video_core/clipper.cpp77
1 files changed, 40 insertions, 37 deletions
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 0bcd0b895..1744066ba 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -2,7 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <vector> 5#include <boost/container/static_vector.hpp>
6 6
7#include "clipper.h" 7#include "clipper.h"
8#include "pica.h" 8#include "pica.h"
@@ -91,25 +91,31 @@ static void InitScreenCoordinates(OutputVertex& vtx)
91 viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range); 91 viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range);
92 viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane); 92 viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane);
93 93
94 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
95 vtx.color *= inv_w;
96 vtx.tc0 *= inv_w;
97 vtx.tc1 *= inv_w;
98 vtx.tc2 *= inv_w;
99 vtx.pos.w = inv_w;
100
94 // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not 101 // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
95 vtx.screenpos[0] = (vtx.pos.x / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; 102 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
96 vtx.screenpos[1] = (vtx.pos.y / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; 103 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
97 vtx.screenpos[2] = viewport.offset_z - vtx.pos.z / vtx.pos.w * viewport.zscale; 104 vtx.screenpos[2] = viewport.offset_z - vtx.pos.z * inv_w * viewport.zscale;
98} 105}
99 106
100void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { 107void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
101 108 using boost::container::static_vector;
102 // TODO (neobrain): 109
103 // The list of output vertices has some fixed maximum size, 110 // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at
104 // however I haven't taken the time to figure out what it is exactly. 111 // the new edge (or less in degenerate cases). As such, we can say that each clipping plane
105 // For now, we hence just assume a maximal size of 1000 vertices. 112 // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
106 const size_t max_vertices = 1000; 113 // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
107 std::vector<OutputVertex> buffer_vertices; 114 static const size_t MAX_VERTICES = 9;
108 std::vector<OutputVertex*> output_list{ &v0, &v1, &v2 }; 115 static_vector<OutputVertex, MAX_VERTICES> buffer_a = { v0, v1, v2 };
109 116 static_vector<OutputVertex, MAX_VERTICES> buffer_b;
110 // Make sure to reserve space for all vertices. 117 auto* output_list = &buffer_a;
111 // Without this, buffer reallocation would invalidate references. 118 auto* input_list = &buffer_b;
112 buffer_vertices.reserve(max_vertices);
113 119
114 // Simple implementation of the Sutherland-Hodgman clipping algorithm. 120 // Simple implementation of the Sutherland-Hodgman clipping algorithm.
115 // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) 121 // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
@@ -120,48 +126,45 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
120 ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)), 126 ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)),
121 ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) { 127 ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) {
122 128
123 const std::vector<OutputVertex*> input_list = output_list; 129 std::swap(input_list, output_list);
124 output_list.clear(); 130 output_list->clear();
125 131
126 const OutputVertex* reference_vertex = input_list.back(); 132 const OutputVertex* reference_vertex = &input_list->back();
127 133
128 for (const auto& vertex : input_list) { 134 for (const auto& vertex : *input_list) {
129 // NOTE: This algorithm changes vertex order in some cases! 135 // NOTE: This algorithm changes vertex order in some cases!
130 if (edge.IsInside(*vertex)) { 136 if (edge.IsInside(vertex)) {
131 if (edge.IsOutSide(*reference_vertex)) { 137 if (edge.IsOutSide(*reference_vertex)) {
132 buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); 138 output_list->push_back(edge.GetIntersection(vertex, *reference_vertex));
133 output_list.push_back(&(buffer_vertices.back()));
134 } 139 }
135 140
136 output_list.push_back(vertex); 141 output_list->push_back(vertex);
137 } else if (edge.IsInside(*reference_vertex)) { 142 } else if (edge.IsInside(*reference_vertex)) {
138 buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); 143 output_list->push_back(edge.GetIntersection(vertex, *reference_vertex));
139 output_list.push_back(&(buffer_vertices.back()));
140 } 144 }
141 145 reference_vertex = &vertex;
142 reference_vertex = vertex;
143 } 146 }
144 147
145 // Need to have at least a full triangle to continue... 148 // Need to have at least a full triangle to continue...
146 if (output_list.size() < 3) 149 if (output_list->size() < 3)
147 return; 150 return;
148 } 151 }
149 152
150 InitScreenCoordinates(*(output_list[0])); 153 InitScreenCoordinates((*output_list)[0]);
151 InitScreenCoordinates(*(output_list[1])); 154 InitScreenCoordinates((*output_list)[1]);
152 155
153 for (size_t i = 0; i < output_list.size() - 2; i ++) { 156 for (size_t i = 0; i < output_list->size() - 2; i ++) {
154 OutputVertex& vtx0 = *(output_list[0]); 157 OutputVertex& vtx0 = (*output_list)[0];
155 OutputVertex& vtx1 = *(output_list[i+1]); 158 OutputVertex& vtx1 = (*output_list)[i+1];
156 OutputVertex& vtx2 = *(output_list[i+2]); 159 OutputVertex& vtx2 = (*output_list)[i+2];
157 160
158 InitScreenCoordinates(vtx2); 161 InitScreenCoordinates(vtx2);
159 162
160 LOG_TRACE(Render_Software, 163 LOG_TRACE(Render_Software,
161 "Triangle %lu/%lu (%lu buffer vertices) at position (%.3f, %.3f, %.3f, %.3f), " 164 "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), "
162 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " 165 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and "
163 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", 166 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)",
164 i,output_list.size(), buffer_vertices.size(), 167 i, output_list->size(),
165 vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), 168 vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(),
166 vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), 169 vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(),
167 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), 170 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(),