diff options
| author | 2014-12-30 00:13:48 +0100 | |
|---|---|---|
| committer | 2014-12-30 00:13:48 +0100 | |
| commit | b7e0b16354bc31521785247d7da3ad84f3829ea8 (patch) | |
| tree | 9ed2e151b59536187946efb3fe34b51507b19c56 /src/video_core/clipper.cpp | |
| parent | Merge pull request #361 from lioncash/moreqops (diff) | |
| parent | Rasterizer: Pre-divide vertex attributes by W (diff) | |
| download | yuzu-b7e0b16354bc31521785247d7da3ad84f3829ea8.tar.gz yuzu-b7e0b16354bc31521785247d7da3ad84f3829ea8.tar.xz yuzu-b7e0b16354bc31521785247d7da3ad84f3829ea8.zip | |
Merge pull request #351 from yuriks/optimize
Rasterizer Optimizations
Diffstat (limited to 'src/video_core/clipper.cpp')
| -rw-r--r-- | src/video_core/clipper.cpp | 77 |
1 files changed, 40 insertions, 37 deletions
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 0bcd0b895..1744066ba 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <vector> | 5 | #include <boost/container/static_vector.hpp> |
| 6 | 6 | ||
| 7 | #include "clipper.h" | 7 | #include "clipper.h" |
| 8 | #include "pica.h" | 8 | #include "pica.h" |
| @@ -91,25 +91,31 @@ static void InitScreenCoordinates(OutputVertex& vtx) | |||
| 91 | viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range); | 91 | viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range); |
| 92 | viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane); | 92 | viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane); |
| 93 | 93 | ||
| 94 | float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; | ||
| 95 | vtx.color *= inv_w; | ||
| 96 | vtx.tc0 *= inv_w; | ||
| 97 | vtx.tc1 *= inv_w; | ||
| 98 | vtx.tc2 *= inv_w; | ||
| 99 | vtx.pos.w = inv_w; | ||
| 100 | |||
| 94 | // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not | 101 | // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not |
| 95 | vtx.screenpos[0] = (vtx.pos.x / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; | 102 | vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; |
| 96 | vtx.screenpos[1] = (vtx.pos.y / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; | 103 | vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; |
| 97 | vtx.screenpos[2] = viewport.offset_z - vtx.pos.z / vtx.pos.w * viewport.zscale; | 104 | vtx.screenpos[2] = viewport.offset_z - vtx.pos.z * inv_w * viewport.zscale; |
| 98 | } | 105 | } |
| 99 | 106 | ||
| 100 | void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | 107 | void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { |
| 101 | 108 | using boost::container::static_vector; | |
| 102 | // TODO (neobrain): | 109 | |
| 103 | // The list of output vertices has some fixed maximum size, | 110 | // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at |
| 104 | // however I haven't taken the time to figure out what it is exactly. | 111 | // the new edge (or less in degenerate cases). As such, we can say that each clipping plane |
| 105 | // For now, we hence just assume a maximal size of 1000 vertices. | 112 | // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a |
| 106 | const size_t max_vertices = 1000; | 113 | // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. |
| 107 | std::vector<OutputVertex> buffer_vertices; | 114 | static const size_t MAX_VERTICES = 9; |
| 108 | std::vector<OutputVertex*> output_list{ &v0, &v1, &v2 }; | 115 | static_vector<OutputVertex, MAX_VERTICES> buffer_a = { v0, v1, v2 }; |
| 109 | 116 | static_vector<OutputVertex, MAX_VERTICES> buffer_b; | |
| 110 | // Make sure to reserve space for all vertices. | 117 | auto* output_list = &buffer_a; |
| 111 | // Without this, buffer reallocation would invalidate references. | 118 | auto* input_list = &buffer_b; |
| 112 | buffer_vertices.reserve(max_vertices); | ||
| 113 | 119 | ||
| 114 | // Simple implementation of the Sutherland-Hodgman clipping algorithm. | 120 | // Simple implementation of the Sutherland-Hodgman clipping algorithm. |
| 115 | // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) | 121 | // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) |
| @@ -120,48 +126,45 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | |||
| 120 | ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)), | 126 | ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)), |
| 121 | ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) { | 127 | ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) { |
| 122 | 128 | ||
| 123 | const std::vector<OutputVertex*> input_list = output_list; | 129 | std::swap(input_list, output_list); |
| 124 | output_list.clear(); | 130 | output_list->clear(); |
| 125 | 131 | ||
| 126 | const OutputVertex* reference_vertex = input_list.back(); | 132 | const OutputVertex* reference_vertex = &input_list->back(); |
| 127 | 133 | ||
| 128 | for (const auto& vertex : input_list) { | 134 | for (const auto& vertex : *input_list) { |
| 129 | // NOTE: This algorithm changes vertex order in some cases! | 135 | // NOTE: This algorithm changes vertex order in some cases! |
| 130 | if (edge.IsInside(*vertex)) { | 136 | if (edge.IsInside(vertex)) { |
| 131 | if (edge.IsOutSide(*reference_vertex)) { | 137 | if (edge.IsOutSide(*reference_vertex)) { |
| 132 | buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); | 138 | output_list->push_back(edge.GetIntersection(vertex, *reference_vertex)); |
| 133 | output_list.push_back(&(buffer_vertices.back())); | ||
| 134 | } | 139 | } |
| 135 | 140 | ||
| 136 | output_list.push_back(vertex); | 141 | output_list->push_back(vertex); |
| 137 | } else if (edge.IsInside(*reference_vertex)) { | 142 | } else if (edge.IsInside(*reference_vertex)) { |
| 138 | buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); | 143 | output_list->push_back(edge.GetIntersection(vertex, *reference_vertex)); |
| 139 | output_list.push_back(&(buffer_vertices.back())); | ||
| 140 | } | 144 | } |
| 141 | 145 | reference_vertex = &vertex; | |
| 142 | reference_vertex = vertex; | ||
| 143 | } | 146 | } |
| 144 | 147 | ||
| 145 | // Need to have at least a full triangle to continue... | 148 | // Need to have at least a full triangle to continue... |
| 146 | if (output_list.size() < 3) | 149 | if (output_list->size() < 3) |
| 147 | return; | 150 | return; |
| 148 | } | 151 | } |
| 149 | 152 | ||
| 150 | InitScreenCoordinates(*(output_list[0])); | 153 | InitScreenCoordinates((*output_list)[0]); |
| 151 | InitScreenCoordinates(*(output_list[1])); | 154 | InitScreenCoordinates((*output_list)[1]); |
| 152 | 155 | ||
| 153 | for (size_t i = 0; i < output_list.size() - 2; i ++) { | 156 | for (size_t i = 0; i < output_list->size() - 2; i ++) { |
| 154 | OutputVertex& vtx0 = *(output_list[0]); | 157 | OutputVertex& vtx0 = (*output_list)[0]; |
| 155 | OutputVertex& vtx1 = *(output_list[i+1]); | 158 | OutputVertex& vtx1 = (*output_list)[i+1]; |
| 156 | OutputVertex& vtx2 = *(output_list[i+2]); | 159 | OutputVertex& vtx2 = (*output_list)[i+2]; |
| 157 | 160 | ||
| 158 | InitScreenCoordinates(vtx2); | 161 | InitScreenCoordinates(vtx2); |
| 159 | 162 | ||
| 160 | LOG_TRACE(Render_Software, | 163 | LOG_TRACE(Render_Software, |
| 161 | "Triangle %lu/%lu (%lu buffer vertices) at position (%.3f, %.3f, %.3f, %.3f), " | 164 | "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " |
| 162 | "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " | 165 | "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " |
| 163 | "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", | 166 | "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", |
| 164 | i,output_list.size(), buffer_vertices.size(), | 167 | i, output_list->size(), |
| 165 | vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), | 168 | vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), |
| 166 | vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), | 169 | vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), |
| 167 | vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), | 170 | vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), |