diff options
| author | 2014-12-30 00:13:48 +0100 | |
|---|---|---|
| committer | 2014-12-30 00:13:48 +0100 | |
| commit | b7e0b16354bc31521785247d7da3ad84f3829ea8 (patch) | |
| tree | 9ed2e151b59536187946efb3fe34b51507b19c56 /src/video_core/rasterizer.cpp | |
| parent | Merge pull request #361 from lioncash/moreqops (diff) | |
| parent | Rasterizer: Pre-divide vertex attributes by W (diff) | |
| download | yuzu-b7e0b16354bc31521785247d7da3ad84f3829ea8.tar.gz yuzu-b7e0b16354bc31521785247d7da3ad84f3829ea8.tar.xz yuzu-b7e0b16354bc31521785247d7da3ad84f3829ea8.zip | |
Merge pull request #351 from yuriks/optimize
Rasterizer Optimizations
Diffstat (limited to 'src/video_core/rasterizer.cpp')
| -rw-r--r-- | src/video_core/rasterizer.cpp | 28 |
1 files changed, 14 insertions, 14 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index df1f88c79..a80148872 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -106,6 +106,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 106 | int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | 106 | int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; |
| 107 | int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | 107 | int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; |
| 108 | 108 | ||
| 109 | auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); | ||
| 110 | |||
| 111 | auto textures = registers.GetTextures(); | ||
| 112 | auto tev_stages = registers.GetTevStages(); | ||
| 113 | |||
| 109 | // TODO: Not sure if looping through x first might be faster | 114 | // TODO: Not sure if looping through x first might be faster |
| 110 | for (u16 y = min_y; y < max_y; y += 0x10) { | 115 | for (u16 y = min_y; y < max_y; y += 0x10) { |
| 111 | for (u16 x = min_x; x < max_x; x += 0x10) { | 116 | for (u16 x = min_x; x < max_x; x += 0x10) { |
| @@ -129,6 +134,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 129 | if (w0 < 0 || w1 < 0 || w2 < 0) | 134 | if (w0 < 0 || w1 < 0 || w2 < 0) |
| 130 | continue; | 135 | continue; |
| 131 | 136 | ||
| 137 | auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), | ||
| 138 | float24::FromFloat32(static_cast<float>(w1)), | ||
| 139 | float24::FromFloat32(static_cast<float>(w2))); | ||
| 140 | float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); | ||
| 141 | |||
| 132 | // Perspective correct attribute interpolation: | 142 | // Perspective correct attribute interpolation: |
| 133 | // Attribute values cannot be calculated by simple linear interpolation since | 143 | // Attribute values cannot be calculated by simple linear interpolation since |
| 134 | // they are not linear in screen space. For example, when interpolating a | 144 | // they are not linear in screen space. For example, when interpolating a |
| @@ -145,19 +155,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 145 | // | 155 | // |
| 146 | // The generalization to three vertices is straightforward in baricentric coordinates. | 156 | // The generalization to three vertices is straightforward in baricentric coordinates. |
| 147 | auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { | 157 | auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { |
| 148 | auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, | 158 | auto attr_over_w = Math::MakeVec(attr0, attr1, attr2); |
| 149 | attr1 / v1.pos.w, | ||
| 150 | attr2 / v2.pos.w); | ||
| 151 | auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w, | ||
| 152 | float24::FromFloat32(1.f) / v1.pos.w, | ||
| 153 | float24::FromFloat32(1.f) / v2.pos.w); | ||
| 154 | auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), | ||
| 155 | float24::FromFloat32(static_cast<float>(w1)), | ||
| 156 | float24::FromFloat32(static_cast<float>(w2))); | ||
| 157 | |||
| 158 | float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); | 159 | float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); |
| 159 | float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); | 160 | return interpolated_attr_over_w * interpolated_w_inverse; |
| 160 | return interpolated_attr_over_w / interpolated_w_inverse; | ||
| 161 | }; | 161 | }; |
| 162 | 162 | ||
| 163 | Math::Vec4<u8> primary_color{ | 163 | Math::Vec4<u8> primary_color{ |
| @@ -177,7 +177,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 177 | 177 | ||
| 178 | Math::Vec4<u8> texture_color[3]{}; | 178 | Math::Vec4<u8> texture_color[3]{}; |
| 179 | for (int i = 0; i < 3; ++i) { | 179 | for (int i = 0; i < 3; ++i) { |
| 180 | auto texture = registers.GetTextures()[i]; | 180 | const auto& texture = textures[i]; |
| 181 | if (!texture.enabled) | 181 | if (!texture.enabled) |
| 182 | continue; | 182 | continue; |
| 183 | 183 | ||
| @@ -219,7 +219,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 219 | // with some basic arithmetic. Alpha combiners can be configured separately but work | 219 | // with some basic arithmetic. Alpha combiners can be configured separately but work |
| 220 | // analogously. | 220 | // analogously. |
| 221 | Math::Vec4<u8> combiner_output; | 221 | Math::Vec4<u8> combiner_output; |
| 222 | for (auto tev_stage : registers.GetTevStages()) { | 222 | for (const auto& tev_stage : tev_stages) { |
| 223 | using Source = Regs::TevStageConfig::Source; | 223 | using Source = Regs::TevStageConfig::Source; |
| 224 | using ColorModifier = Regs::TevStageConfig::ColorModifier; | 224 | using ColorModifier = Regs::TevStageConfig::ColorModifier; |
| 225 | using AlphaModifier = Regs::TevStageConfig::AlphaModifier; | 225 | using AlphaModifier = Regs::TevStageConfig::AlphaModifier; |