summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/citra_qt/main.cpp3
-rw-r--r--src/video_core/clipper.cpp77
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp51
-rw-r--r--src/video_core/pica.h20
-rw-r--r--src/video_core/rasterizer.cpp28
-rw-r--r--src/video_core/vertex_shader.cpp4
6 files changed, 105 insertions, 78 deletions
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 23d4925b8..b12e6a02b 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -230,12 +230,15 @@ void GMainWindow::ToggleWindowMode()
230 render_window->setParent(nullptr); 230 render_window->setParent(nullptr);
231 render_window->setVisible(true); 231 render_window->setVisible(true);
232 render_window->RestoreGeometry(); 232 render_window->RestoreGeometry();
233 render_window->setFocusPolicy(Qt::NoFocus);
233 } 234 }
234 else if (!enable && render_window->parent() == nullptr) 235 else if (!enable && render_window->parent() == nullptr)
235 { 236 {
236 render_window->BackupGeometry(); 237 render_window->BackupGeometry();
237 ui.horizontalLayout->addWidget(render_window); 238 ui.horizontalLayout->addWidget(render_window);
238 render_window->setVisible(true); 239 render_window->setVisible(true);
240 render_window->setFocusPolicy(Qt::ClickFocus);
241 render_window->setFocus();
239 } 242 }
240} 243}
241 244
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 0bcd0b895..1744066ba 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -2,7 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <vector> 5#include <boost/container/static_vector.hpp>
6 6
7#include "clipper.h" 7#include "clipper.h"
8#include "pica.h" 8#include "pica.h"
@@ -91,25 +91,31 @@ static void InitScreenCoordinates(OutputVertex& vtx)
91 viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range); 91 viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range);
92 viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane); 92 viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane);
93 93
94 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
95 vtx.color *= inv_w;
96 vtx.tc0 *= inv_w;
97 vtx.tc1 *= inv_w;
98 vtx.tc2 *= inv_w;
99 vtx.pos.w = inv_w;
100
94 // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not 101 // TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
95 vtx.screenpos[0] = (vtx.pos.x / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; 102 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
96 vtx.screenpos[1] = (vtx.pos.y / vtx.pos.w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; 103 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
97 vtx.screenpos[2] = viewport.offset_z - vtx.pos.z / vtx.pos.w * viewport.zscale; 104 vtx.screenpos[2] = viewport.offset_z - vtx.pos.z * inv_w * viewport.zscale;
98} 105}
99 106
100void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { 107void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
101 108 using boost::container::static_vector;
102 // TODO (neobrain): 109
103 // The list of output vertices has some fixed maximum size, 110 // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at
104 // however I haven't taken the time to figure out what it is exactly. 111 // the new edge (or less in degenerate cases). As such, we can say that each clipping plane
105 // For now, we hence just assume a maximal size of 1000 vertices. 112 // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
106 const size_t max_vertices = 1000; 113 // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
107 std::vector<OutputVertex> buffer_vertices; 114 static const size_t MAX_VERTICES = 9;
108 std::vector<OutputVertex*> output_list{ &v0, &v1, &v2 }; 115 static_vector<OutputVertex, MAX_VERTICES> buffer_a = { v0, v1, v2 };
109 116 static_vector<OutputVertex, MAX_VERTICES> buffer_b;
110 // Make sure to reserve space for all vertices. 117 auto* output_list = &buffer_a;
111 // Without this, buffer reallocation would invalidate references. 118 auto* input_list = &buffer_b;
112 buffer_vertices.reserve(max_vertices);
113 119
114 // Simple implementation of the Sutherland-Hodgman clipping algorithm. 120 // Simple implementation of the Sutherland-Hodgman clipping algorithm.
115 // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) 121 // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
@@ -120,48 +126,45 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
120 ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)), 126 ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)),
121 ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) { 127 ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) {
122 128
123 const std::vector<OutputVertex*> input_list = output_list; 129 std::swap(input_list, output_list);
124 output_list.clear(); 130 output_list->clear();
125 131
126 const OutputVertex* reference_vertex = input_list.back(); 132 const OutputVertex* reference_vertex = &input_list->back();
127 133
128 for (const auto& vertex : input_list) { 134 for (const auto& vertex : *input_list) {
129 // NOTE: This algorithm changes vertex order in some cases! 135 // NOTE: This algorithm changes vertex order in some cases!
130 if (edge.IsInside(*vertex)) { 136 if (edge.IsInside(vertex)) {
131 if (edge.IsOutSide(*reference_vertex)) { 137 if (edge.IsOutSide(*reference_vertex)) {
132 buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); 138 output_list->push_back(edge.GetIntersection(vertex, *reference_vertex));
133 output_list.push_back(&(buffer_vertices.back()));
134 } 139 }
135 140
136 output_list.push_back(vertex); 141 output_list->push_back(vertex);
137 } else if (edge.IsInside(*reference_vertex)) { 142 } else if (edge.IsInside(*reference_vertex)) {
138 buffer_vertices.push_back(edge.GetIntersection(*vertex, *reference_vertex)); 143 output_list->push_back(edge.GetIntersection(vertex, *reference_vertex));
139 output_list.push_back(&(buffer_vertices.back()));
140 } 144 }
141 145 reference_vertex = &vertex;
142 reference_vertex = vertex;
143 } 146 }
144 147
145 // Need to have at least a full triangle to continue... 148 // Need to have at least a full triangle to continue...
146 if (output_list.size() < 3) 149 if (output_list->size() < 3)
147 return; 150 return;
148 } 151 }
149 152
150 InitScreenCoordinates(*(output_list[0])); 153 InitScreenCoordinates((*output_list)[0]);
151 InitScreenCoordinates(*(output_list[1])); 154 InitScreenCoordinates((*output_list)[1]);
152 155
153 for (size_t i = 0; i < output_list.size() - 2; i ++) { 156 for (size_t i = 0; i < output_list->size() - 2; i ++) {
154 OutputVertex& vtx0 = *(output_list[0]); 157 OutputVertex& vtx0 = (*output_list)[0];
155 OutputVertex& vtx1 = *(output_list[i+1]); 158 OutputVertex& vtx1 = (*output_list)[i+1];
156 OutputVertex& vtx2 = *(output_list[i+2]); 159 OutputVertex& vtx2 = (*output_list)[i+2];
157 160
158 InitScreenCoordinates(vtx2); 161 InitScreenCoordinates(vtx2);
159 162
160 LOG_TRACE(Render_Software, 163 LOG_TRACE(Render_Software,
161 "Triangle %lu/%lu (%lu buffer vertices) at position (%.3f, %.3f, %.3f, %.3f), " 164 "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), "
162 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " 165 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and "
163 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", 166 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)",
164 i,output_list.size(), buffer_vertices.size(), 167 i, output_list->size(),
165 vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), 168 vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(),
166 vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), 169 vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(),
167 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), 170 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(),
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 328386b7e..5921185a6 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -304,7 +304,6 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
304} 304}
305 305
306const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { 306const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) {
307
308 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each 307 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
309 // of which is composed of four 2x2 subtiles each of which is composed of four texels. 308 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
310 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. 309 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
@@ -323,41 +322,39 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
323 // 02 03 06 07 18 19 22 23 322 // 02 03 06 07 18 19 22 23
324 // 00 01 04 05 16 17 20 21 323 // 00 01 04 05 16 17 20 21
325 324
326 // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. 325 const unsigned int block_width = 8;
327 // To be flexible in case different but similar patterns are used, we keep this 326 const unsigned int block_height = 8;
328 // somewhat inefficient code around for now. 327
329 int texel_index_within_tile = 0; 328 const unsigned int coarse_x = x & ~7;
330 for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { 329 const unsigned int coarse_y = y & ~7;
331 int sub_tile_width = 1 << block_size_index;
332 int sub_tile_height = 1 << block_size_index;
333
334 int sub_tile_index = (x & sub_tile_width) << block_size_index;
335 sub_tile_index += 2 * ((y & sub_tile_height) << block_size_index);
336 texel_index_within_tile += sub_tile_index;
337 }
338 330
339 const int block_width = 8; 331 // Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
340 const int block_height = 8; 332 // arranged in a Z-order curve. More details on the bit manipulation at:
333 // https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
334 unsigned int i = (x | (y << 8)) & 0x0707; // ---- -210
335 i = (i ^ (i << 2)) & 0x1313; // ---2 --10
336 i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0
337 i = (i | (i >> 7)) & 0x3F;
341 338
342 int coarse_x = (x / block_width) * block_width; 339 source += coarse_y * info.stride;
343 int coarse_y = (y / block_height) * block_height; 340 const unsigned int offset = coarse_x * block_height + i;
344 341
345 switch (info.format) { 342 switch (info.format) {
346 case Regs::TextureFormat::RGBA8: 343 case Regs::TextureFormat::RGBA8:
347 { 344 {
348 const u8* source_ptr = source + coarse_x * block_height * 4 + coarse_y * info.stride + texel_index_within_tile * 4; 345 const u8* source_ptr = source + offset * 4;
349 return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] }; 346 return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] };
350 } 347 }
351 348
352 case Regs::TextureFormat::RGB8: 349 case Regs::TextureFormat::RGB8:
353 { 350 {
354 const u8* source_ptr = source + coarse_x * block_height * 3 + coarse_y * info.stride + texel_index_within_tile * 3; 351 const u8* source_ptr = source + offset * 3;
355 return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; 352 return { source_ptr[2], source_ptr[1], source_ptr[0], 255 };
356 } 353 }
357 354
358 case Regs::TextureFormat::RGBA5551: 355 case Regs::TextureFormat::RGBA5551:
359 { 356 {
360 const u16 source_ptr = *(const u16*)(source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2); 357 const u16 source_ptr = *(const u16*)(source + offset * 2);
361 u8 r = (source_ptr >> 11) & 0x1F; 358 u8 r = (source_ptr >> 11) & 0x1F;
362 u8 g = ((source_ptr) >> 6) & 0x1F; 359 u8 g = ((source_ptr) >> 6) & 0x1F;
363 u8 b = (source_ptr >> 1) & 0x1F; 360 u8 b = (source_ptr >> 1) & 0x1F;
@@ -367,7 +364,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
367 364
368 case Regs::TextureFormat::RGB565: 365 case Regs::TextureFormat::RGB565:
369 { 366 {
370 const u16 source_ptr = *(const u16*)(source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2); 367 const u16 source_ptr = *(const u16*)(source + offset * 2);
371 u8 r = (source_ptr >> 11) & 0x1F; 368 u8 r = (source_ptr >> 11) & 0x1F;
372 u8 g = ((source_ptr) >> 5) & 0x3F; 369 u8 g = ((source_ptr) >> 5) & 0x3F;
373 u8 b = (source_ptr) & 0x1F; 370 u8 b = (source_ptr) & 0x1F;
@@ -376,7 +373,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
376 373
377 case Regs::TextureFormat::RGBA4: 374 case Regs::TextureFormat::RGBA4:
378 { 375 {
379 const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2; 376 const u8* source_ptr = source + offset * 2;
380 u8 r = source_ptr[1] >> 4; 377 u8 r = source_ptr[1] >> 4;
381 u8 g = source_ptr[1] & 0xFF; 378 u8 g = source_ptr[1] & 0xFF;
382 u8 b = source_ptr[0] >> 4; 379 u8 b = source_ptr[0] >> 4;
@@ -390,7 +387,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
390 387
391 case Regs::TextureFormat::IA8: 388 case Regs::TextureFormat::IA8:
392 { 389 {
393 const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2; 390 const u8* source_ptr = source + offset * 2;
394 391
395 // TODO: component order not verified 392 // TODO: component order not verified
396 393
@@ -404,13 +401,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
404 401
405 case Regs::TextureFormat::I8: 402 case Regs::TextureFormat::I8:
406 { 403 {
407 const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile; 404 const u8* source_ptr = source + offset;
408 return { *source_ptr, *source_ptr, *source_ptr, 255 }; 405 return { *source_ptr, *source_ptr, *source_ptr, 255 };
409 } 406 }
410 407
411 case Regs::TextureFormat::A8: 408 case Regs::TextureFormat::A8:
412 { 409 {
413 const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile; 410 const u8* source_ptr = source + offset;
414 411
415 if (disable_alpha) { 412 if (disable_alpha) {
416 return { *source_ptr, *source_ptr, *source_ptr, 255 }; 413 return { *source_ptr, *source_ptr, *source_ptr, 255 };
@@ -421,7 +418,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
421 418
422 case Regs::TextureFormat::IA4: 419 case Regs::TextureFormat::IA4:
423 { 420 {
424 const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2; 421 const u8* source_ptr = source + offset / 2;
425 422
426 // TODO: component order not verified 423 // TODO: component order not verified
427 424
@@ -440,7 +437,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
440 437
441 case Regs::TextureFormat::A4: 438 case Regs::TextureFormat::A4:
442 { 439 {
443 const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2; 440 const u8* source_ptr = source + offset / 2;
444 441
445 // TODO: component order not verified 442 // TODO: component order not verified
446 443
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 89d97e4e9..38bac748c 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -757,6 +757,26 @@ struct float24 {
757 return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); 757 return float24::FromFloat32(ToFloat32() - flt.ToFloat32());
758 } 758 }
759 759
760 float24& operator *= (const float24& flt) {
761 value *= flt.ToFloat32();
762 return *this;
763 }
764
765 float24& operator /= (const float24& flt) {
766 value /= flt.ToFloat32();
767 return *this;
768 }
769
770 float24& operator += (const float24& flt) {
771 value += flt.ToFloat32();
772 return *this;
773 }
774
775 float24& operator -= (const float24& flt) {
776 value -= flt.ToFloat32();
777 return *this;
778 }
779
760 float24 operator - () const { 780 float24 operator - () const {
761 return float24::FromFloat32(-ToFloat32()); 781 return float24::FromFloat32(-ToFloat32());
762 } 782 }
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index df1f88c79..a80148872 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -106,6 +106,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
106 int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; 106 int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
107 int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; 107 int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
108 108
109 auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
110
111 auto textures = registers.GetTextures();
112 auto tev_stages = registers.GetTevStages();
113
109 // TODO: Not sure if looping through x first might be faster 114 // TODO: Not sure if looping through x first might be faster
110 for (u16 y = min_y; y < max_y; y += 0x10) { 115 for (u16 y = min_y; y < max_y; y += 0x10) {
111 for (u16 x = min_x; x < max_x; x += 0x10) { 116 for (u16 x = min_x; x < max_x; x += 0x10) {
@@ -129,6 +134,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
129 if (w0 < 0 || w1 < 0 || w2 < 0) 134 if (w0 < 0 || w1 < 0 || w2 < 0)
130 continue; 135 continue;
131 136
137 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
138 float24::FromFloat32(static_cast<float>(w1)),
139 float24::FromFloat32(static_cast<float>(w2)));
140 float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
141
132 // Perspective correct attribute interpolation: 142 // Perspective correct attribute interpolation:
133 // Attribute values cannot be calculated by simple linear interpolation since 143 // Attribute values cannot be calculated by simple linear interpolation since
134 // they are not linear in screen space. For example, when interpolating a 144 // they are not linear in screen space. For example, when interpolating a
@@ -145,19 +155,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
145 // 155 //
146 // The generalization to three vertices is straightforward in baricentric coordinates. 156 // The generalization to three vertices is straightforward in baricentric coordinates.
147 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { 157 auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
148 auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, 158 auto attr_over_w = Math::MakeVec(attr0, attr1, attr2);
149 attr1 / v1.pos.w,
150 attr2 / v2.pos.w);
151 auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
152 float24::FromFloat32(1.f) / v1.pos.w,
153 float24::FromFloat32(1.f) / v2.pos.w);
154 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
155 float24::FromFloat32(static_cast<float>(w1)),
156 float24::FromFloat32(static_cast<float>(w2)));
157
158 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); 159 float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
159 float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); 160 return interpolated_attr_over_w * interpolated_w_inverse;
160 return interpolated_attr_over_w / interpolated_w_inverse;
161 }; 161 };
162 162
163 Math::Vec4<u8> primary_color{ 163 Math::Vec4<u8> primary_color{
@@ -177,7 +177,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
177 177
178 Math::Vec4<u8> texture_color[3]{}; 178 Math::Vec4<u8> texture_color[3]{};
179 for (int i = 0; i < 3; ++i) { 179 for (int i = 0; i < 3; ++i) {
180 auto texture = registers.GetTextures()[i]; 180 const auto& texture = textures[i];
181 if (!texture.enabled) 181 if (!texture.enabled)
182 continue; 182 continue;
183 183
@@ -219,7 +219,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
219 // with some basic arithmetic. Alpha combiners can be configured separately but work 219 // with some basic arithmetic. Alpha combiners can be configured separately but work
220 // analogously. 220 // analogously.
221 Math::Vec4<u8> combiner_output; 221 Math::Vec4<u8> combiner_output;
222 for (auto tev_stage : registers.GetTevStages()) { 222 for (const auto& tev_stage : tev_stages) {
223 using Source = Regs::TevStageConfig::Source; 223 using Source = Regs::TevStageConfig::Source;
224 using ColorModifier = Regs::TevStageConfig::ColorModifier; 224 using ColorModifier = Regs::TevStageConfig::ColorModifier;
225 using AlphaModifier = Regs::TevStageConfig::AlphaModifier; 225 using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index e31bc3bc7..bed5081a0 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -469,6 +469,10 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
469 469
470 // Setup output register table 470 // Setup output register table
471 OutputVertex ret; 471 OutputVertex ret;
472 // Zero output so that attributes which aren't output won't have denormals in them, which will
473 // slow us down later.
474 memset(&ret, 0, sizeof(ret));
475
472 for (int i = 0; i < 7; ++i) { 476 for (int i = 0; i < 7; ++i) {
473 const auto& output_register_map = registers.vs_output_attributes[i]; 477 const auto& output_register_map = registers.vs_output_attributes[i];
474 478