summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Yuri Kunde Schlesner2014-12-23 10:59:07 -0200
committerGravatar Yuri Kunde Schlesner2014-12-29 02:08:10 -0200
commit2012e1420f90ea86ea6975f2005f05ecd304b0c4 (patch)
treef5480dbf05c0fda8a11ee1da284816739a234260
parentClipper: Compact buffers on each clipping pass (diff)
downloadyuzu-2012e1420f90ea86ea6975f2005f05ecd304b0c4.tar.gz
yuzu-2012e1420f90ea86ea6975f2005f05ecd304b0c4.tar.xz
yuzu-2012e1420f90ea86ea6975f2005f05ecd304b0c4.zip
Rasterizer: Common sub-expression elimination
Move the computation of some values out of loops so that they're not constantly recalculated even when they don't change.
-rw-r--r--src/video_core/rasterizer.cpp31
1 files changed, 17 insertions, 14 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index df1f88c79..63da7104d 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -106,6 +106,14 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
106 int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; 106 int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
107 int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; 107 int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
108 108
109 const Math::Vec3<float24> w_inverse = Math::MakeVec(
110 float24::FromFloat32(1.0f) / v0.pos.w,
111 float24::FromFloat32(1.0f) / v1.pos.w,
112 float24::FromFloat32(1.0f) / v2.pos.w);
113
114 auto textures = registers.GetTextures();
115 auto tev_stages = registers.GetTevStages();
116
109 // TODO: Not sure if looping through x first might be faster 117 // TODO: Not sure if looping through x first might be faster
110 for (u16 y = min_y; y < max_y; y += 0x10) { 118 for (u16 y = min_y; y < max_y; y += 0x10) {
111 for (u16 x = min_x; x < max_x; x += 0x10) { 119 for (u16 x = min_x; x < max_x; x += 0x10) {
@@ -129,6 +137,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
129 if (w0 < 0 || w1 < 0 || w2 < 0) 137 if (w0 < 0 || w1 < 0 || w2 < 0)
130 continue; 138 continue;
131 139
140 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
141 float24::FromFloat32(static_cast<float>(w1)),
142 float24::FromFloat32(static_cast<float>(w2)));
143 float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
144
132 // Perspective correct attribute interpolation: 145 // Perspective correct attribute interpolation:
133 // Attribute values cannot be calculated by simple linear interpolation since 146 // Attribute values cannot be calculated by simple linear interpolation since
134 // they are not linear in screen space. For example, when interpolating a 147 // they are not linear in screen space. For example, when interpolating a
@@ -145,19 +158,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
145 // 158 //
146 // The generalization to three vertices is straightforward in baricentric coordinates. 159 // The generalization to three vertices is straightforward in baricentric coordinates.
147 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { 160 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
148 auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, 161 auto attr_over_w = Math::MakeVec(attr0, attr1, attr2) * w_inverse;
149 attr1 / v1.pos.w,
150 attr2 / v2.pos.w);
151 auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
152 float24::FromFloat32(1.f) / v1.pos.w,
153 float24::FromFloat32(1.f) / v2.pos.w);
154 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
155 float24::FromFloat32(static_cast<float>(w1)),
156 float24::FromFloat32(static_cast<float>(w2)));
157
158 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); 162 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
159 float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); 163 return interpolated_attr_over_w * interpolated_w_inverse;
160 return interpolated_attr_over_w / interpolated_w_inverse;
161 }; 164 };
162 165
163 Math::Vec4<u8> primary_color{ 166 Math::Vec4<u8> primary_color{
@@ -177,7 +180,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
177 180
178 Math::Vec4<u8> texture_color[3]{}; 181 Math::Vec4<u8> texture_color[3]{};
179 for (int i = 0; i < 3; ++i) { 182 for (int i = 0; i < 3; ++i) {
180 auto texture = registers.GetTextures()[i]; 183 const auto& texture = textures[i];
181 if (!texture.enabled) 184 if (!texture.enabled)
182 continue; 185 continue;
183 186
@@ -219,7 +222,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
219 // with some basic arithmetic. Alpha combiners can be configured separately but work 222 // with some basic arithmetic. Alpha combiners can be configured separately but work
220 // analogously. 223 // analogously.
221 Math::Vec4<u8> combiner_output; 224 Math::Vec4<u8> combiner_output;
222 for (auto tev_stage : registers.GetTevStages()) { 225 for (const auto& tev_stage : tev_stages) {
223 using Source = Regs::TevStageConfig::Source; 226 using Source = Regs::TevStageConfig::Source;
224 using ColorModifier = Regs::TevStageConfig::ColorModifier; 227 using ColorModifier = Regs::TevStageConfig::ColorModifier;
225 using AlphaModifier = Regs::TevStageConfig::AlphaModifier; 228 using AlphaModifier = Regs::TevStageConfig::AlphaModifier;