Merge pull request #50 from neobrain/pica

Further work on Pica emulation
author: bunnei 2014-08-25 16:12:10 -0400
committer: bunnei 2014-08-25 16:12:10 -0400
commit: 97fd8fc38d4f9c288779cddb06538860124c6263 (patch)
tree: bc99e0fceaae732f9c8d4831fcdb8f661b49ccb8 /src/video_core/rasterizer.cpp
parent: Merge pull request #75 from xsacha/qt5 (diff)
parent: Pica/Rasterizer: Clarify a TODO. (diff)
download: yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.gz
yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.xz
yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.zip
1 files changed, 204 insertions, 18 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a7c1bab3e..cdfdb6215 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -11,6 +11,8 @@
 #include "rasterizer.h"
 #include "vertex_shader.h"
+#include "debug_utils/debug_utils.h"
 namespace Pica {
 namespace Rasterizer {
@@ -78,10 +80,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
    u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
    u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
-    min_x = min_x & Fix12P4::IntMask();
+    min_x &= Fix12P4::IntMask();
-    min_y = min_y & Fix12P4::IntMask();
+    min_y &= Fix12P4::IntMask();
-    max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask();
+    max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask());
-    max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask();
+    max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask());
    // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
    // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
@@ -112,10 +114,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
            auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
                               const Math::Vec2<Fix12P4>& vtx2,
                               const Math::Vec2<Fix12P4>& vtx3) {
-                const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0);
+                const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
-                const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0);
+                const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
                // TODO: There is a very small chance this will overflow for sizeof(int) == 4
-                return Cross(vec1, vec2).z;
+                return Math::Cross(vec1, vec2).z;
            };
            int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
@@ -143,15 +145,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
            //
            // The generalization to three vertices is straightforward in baricentric coordinates.
            auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
-                auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w,
+                auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w,
-                                                  attr1 / v1.pos.w,
+                                                 attr1 / v1.pos.w,
-                                                  attr2 / v2.pos.w);
+                                                 attr2 / v2.pos.w);
-                auto w_inverse   = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w,
+                auto w_inverse   = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
-                                                  float24::FromFloat32(1.f) / v1.pos.w,
+                                                 float24::FromFloat32(1.f) / v1.pos.w,
-                                                  float24::FromFloat32(1.f) / v2.pos.w);
+                                                 float24::FromFloat32(1.f) / v2.pos.w);
-                auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0),
+                auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0),
-                                                              float24::FromFloat32(w1),
+                                                             float24::FromFloat32(w1),
-                                                              float24::FromFloat32(w2));
+                                                             float24::FromFloat32(w2));
                float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
                float24 interpolated_w_inverse   = Math::Dot(w_inverse,   baricentric_coordinates);
@@ -165,12 +167,196 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
            };
+            Math::Vec4<u8> texture_color{};
+            float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
+            float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
+            if (registers.texturing_enable) {
+                // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
+                // of which is composed of four 2x2 subtiles each of which is composed of four texels.
+                // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
+                // texels are laid out in a 2x2 subtile like this:
+                // 2 3
+                // 0 1
+                //
+                // The full 8x8 tile has the texels arranged like this:
+                //
+                // 42 43 46 47 58 59 62 63
+                // 40 41 44 45 56 57 60 61
+                // 34 35 38 39 50 51 54 55
+                // 32 33 36 37 48 49 52 53
+                // 10 11 14 15 26 27 30 31
+                // 08 09 12 13 24 25 28 29
+                // 02 03 06 07 18 19 22 23
+                // 00 01 04 05 16 17 20 21
+                // TODO: This is currently hardcoded for RGB8
+                u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress());
+                // TODO(neobrain): Not sure if this swizzling pattern is used for all textures.
+                // To be flexible in case different but similar patterns are used, we keep this
+                // somewhat inefficient code around for now.
+                int s = (int)(u * float24::FromFloat32(registers.texture0.width)).ToFloat32();
+                int t = (int)(v * float24::FromFloat32(registers.texture0.height)).ToFloat32();
+                int texel_index_within_tile = 0;
+                for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
+                    int sub_tile_width = 1 << block_size_index;
+                    int sub_tile_height = 1 << block_size_index;
+                    int sub_tile_index = (s & sub_tile_width) << block_size_index;
+                    sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index);
+                    texel_index_within_tile += sub_tile_index;
+                }
+                const int block_width = 8;
+                const int block_height = 8;
+                int coarse_s = (s / block_width) * block_width;
+                int coarse_t = (t / block_height) * block_height;
+                const int row_stride = registers.texture0.width * 3;
+                u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
+                texture_color.r() = source_ptr[2];
+                texture_color.g() = source_ptr[1];
+                texture_color.b() = source_ptr[0];
+                texture_color.a() = 0xFF;
+                DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
+            }
+            // Texture environment - consists of 6 stages of color and alpha combining.
+            //
+            // Color combiners take three input color values from some source (e.g. interpolated
+            // vertex color, texture color, previous stage, etc), perform some very simple
+            // operations on each of them (e.g. inversion) and then calculate the output color
+            // with some basic arithmetic. Alpha combiners can be configured separately but work
+            // analogously.
+            Math::Vec4<u8> combiner_output;
+            for (auto tev_stage : registers.GetTevStages()) {
+                using Source = Regs::TevStageConfig::Source;
+                using ColorModifier = Regs::TevStageConfig::ColorModifier;
+                using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
+                using Operation = Regs::TevStageConfig::Operation;
+                auto GetColorSource = [&](Source source) -> Math::Vec3<u8> {
+                    switch (source) {
+                    case Source::PrimaryColor:
+                        return primary_color.rgb();
+                    case Source::Texture0:
+                        return texture_color.rgb();
+                    case Source::Constant:
+                        return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b};
+                    case Source::Previous:
+                        return combiner_output.rgb();
+                    default:
+                        ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source);
+                        return {};
+                    }
+                };
+                auto GetAlphaSource = [&](Source source) -> u8 {
+                    switch (source) {
+                    case Source::PrimaryColor:
+                        return primary_color.a();
+                    case Source::Texture0:
+                        return texture_color.a();
+                    case Source::Constant:
+                        return tev_stage.const_a;
+                    case Source::Previous:
+                        return combiner_output.a();
+                    default:
+                        ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source);
+                        return 0;
+                    }
+                };
+                auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> {
+                    switch (factor)
+                    {
+                    case ColorModifier::SourceColor:
+                        return values;
+                    default:
+                        ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
+                        return {};
+                    }
+                };
+                auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
+                    switch (factor) {
+                    case AlphaModifier::SourceAlpha:
+                        return value;
+                    default:
+                        ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
+                        return 0;
+                    }
+                };
+                auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
+                    switch (op) {
+                    case Operation::Replace:
+                        return input[0];
+                    case Operation::Modulate:
+                        return ((input[0] * input[1]) / 255).Cast<u8>();
+                    default:
+                        ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op);
+                        return {};
+                    }
+                };
+                auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
+                    switch (op) {
+                    case Operation::Replace:
+                        return input[0];
+                    case Operation::Modulate:
+                        return input[0] * input[1] / 255;
+                    default:
+                        ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op);
+                        return 0;
+                    }
+                };
+                // color combiner
+                // NOTE: Not sure if the alpha combiner might use the color output of the previous
+                //       stage as input. Hence, we currently don't directly write the result to
+                //       combiner_output.rgb(), but instead store it in a temporary variable until
+                //       alpha combining has been done.
+                Math::Vec3<u8> color_result[3] = {
+                    GetColorModifier(tev_stage.color_modifier1, GetColorSource(tev_stage.color_source1)),
+                    GetColorModifier(tev_stage.color_modifier2, GetColorSource(tev_stage.color_source2)),
+                    GetColorModifier(tev_stage.color_modifier3, GetColorSource(tev_stage.color_source3))
+                };
+                auto color_output = ColorCombine(tev_stage.color_op, color_result);
+                // alpha combiner
+                std::array<u8,3> alpha_result = {
+                    GetAlphaModifier(tev_stage.alpha_modifier1, GetAlphaSource(tev_stage.alpha_source1)),
+                    GetAlphaModifier(tev_stage.alpha_modifier2, GetAlphaSource(tev_stage.alpha_source2)),
+                    GetAlphaModifier(tev_stage.alpha_modifier3, GetAlphaSource(tev_stage.alpha_source3))
+                };
+                auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
+                combiner_output = Math::MakeVec(color_output, alpha_output);
+            }
+            // TODO: Not sure if the multiplication by 65535 has already been taken care
+            // of when transforming to screen coordinates or not.
            u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
                           (float)v1.screenpos[2].ToFloat32() * w1 +
-                           (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536?
+                           (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
            SetDepth(x >> 4, y >> 4, z);
-            DrawPixel(x >> 4, y >> 4, primary_color);
+            DrawPixel(x >> 4, y >> 4, combiner_output);
        }
    }
 }
author	bunnei	2014-08-25 16:12:10 -0400
committer	bunnei	2014-08-25 16:12:10 -0400
commit	97fd8fc38d4f9c288779cddb06538860124c6263 (patch)
tree	bc99e0fceaae732f9c8d4831fcdb8f661b49ccb8 /src/video_core/rasterizer.cpp
parent	Merge pull request #75 from xsacha/qt5 (diff)
parent	Pica/Rasterizer: Clarify a TODO. (diff)
download	yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.gz yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.xz yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.zip

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a7c1bab3e..cdfdb6215 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp
@@ -11,6 +11,8 @@
11	#include "rasterizer.h"	11	#include "rasterizer.h"
12	#include "vertex_shader.h"	12	#include "vertex_shader.h"
13		13
		14	#include "debug_utils/debug_utils.h"
		15
14	namespace Pica {	16	namespace Pica {
15		17
16	namespace Rasterizer {	18	namespace Rasterizer {
@@ -78,10 +80,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
78	u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});	80	u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
79	u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});	81	u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
80		82
81	min_x = min_x & Fix12P4::IntMask();	83	min_x &= Fix12P4::IntMask();
82	min_y = min_y & Fix12P4::IntMask();	84	min_y &= Fix12P4::IntMask();
83	max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask();	85	max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask());
84	max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask();	86	max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask());
85		87
86	// Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not	88	// Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not
87	// drawn. Pixels on any other triangle border are drawn. This is implemented with three bias	89	// drawn. Pixels on any other triangle border are drawn. This is implemented with three bias
@@ -112,10 +114,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
112	auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,	114	auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1,
113	const Math::Vec2<Fix12P4>& vtx2,	115	const Math::Vec2<Fix12P4>& vtx2,
114	const Math::Vec2<Fix12P4>& vtx3) {	116	const Math::Vec2<Fix12P4>& vtx3) {
115	const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0);	117	const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
116	const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0);	118	const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
117	// TODO: There is a very small chance this will overflow for sizeof(int) == 4	119	// TODO: There is a very small chance this will overflow for sizeof(int) == 4
118	return Cross(vec1, vec2).z;	120	return Math::Cross(vec1, vec2).z;
119	};	121	};
120		122
121	int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});	123	int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
@@ -143,15 +145,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
143	//	145	//
144	// The generalization to three vertices is straightforward in baricentric coordinates.	146	// The generalization to three vertices is straightforward in baricentric coordinates.
145	auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {	147	auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
146	auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w,	148	auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w,
147	attr1 / v1.pos.w,	149	attr1 / v1.pos.w,
148	attr2 / v2.pos.w);	150	attr2 / v2.pos.w);
149	auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w,	151	auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
150	float24::FromFloat32(1.f) / v1.pos.w,	152	float24::FromFloat32(1.f) / v1.pos.w,
151	float24::FromFloat32(1.f) / v2.pos.w);	153	float24::FromFloat32(1.f) / v2.pos.w);
152	auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0),	154	auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0),
153	float24::FromFloat32(w1),	155	float24::FromFloat32(w1),
154	float24::FromFloat32(w2));	156	float24::FromFloat32(w2));
155		157
156	float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);	158	float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
157	float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates);	159	float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates);
@@ -165,12 +167,196 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
165	(u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)	167	(u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
166	};	168	};
167		169
		170	Math::Vec4<u8> texture_color{};
		171	float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
		172	float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
		173	if (registers.texturing_enable) {
		174	// Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
		175	// of which is composed of four 2x2 subtiles each of which is composed of four texels.
		176	// Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
		177	// texels are laid out in a 2x2 subtile like this:
		178	// 2 3
		179	// 0 1
		180	//
		181	// The full 8x8 tile has the texels arranged like this:
		182	//
		183	// 42 43 46 47 58 59 62 63
		184	// 40 41 44 45 56 57 60 61
		185	// 34 35 38 39 50 51 54 55
		186	// 32 33 36 37 48 49 52 53
		187	// 10 11 14 15 26 27 30 31
		188	// 08 09 12 13 24 25 28 29
		189	// 02 03 06 07 18 19 22 23
		190	// 00 01 04 05 16 17 20 21
		191
		192	// TODO: This is currently hardcoded for RGB8
		193	u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress());
		194
		195	// TODO(neobrain): Not sure if this swizzling pattern is used for all textures.
		196	// To be flexible in case different but similar patterns are used, we keep this
		197	// somewhat inefficient code around for now.
		198	int s = (int)(u * float24::FromFloat32(registers.texture0.width)).ToFloat32();
		199	int t = (int)(v * float24::FromFloat32(registers.texture0.height)).ToFloat32();
		200	int texel_index_within_tile = 0;
		201	for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
		202	int sub_tile_width = 1 << block_size_index;
		203	int sub_tile_height = 1 << block_size_index;
		204
		205	int sub_tile_index = (s & sub_tile_width) << block_size_index;
		206	sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index);
		207	texel_index_within_tile += sub_tile_index;
		208	}
		209
		210	const int block_width = 8;
		211	const int block_height = 8;
		212
		213	int coarse_s = (s / block_width) * block_width;
		214	int coarse_t = (t / block_height) * block_height;
		215
		216	const int row_stride = registers.texture0.width * 3;
		217	u8* source_ptr = (u8)texture_data + coarse_s block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
		218	texture_color.r() = source_ptr[2];
		219	texture_color.g() = source_ptr[1];
		220	texture_color.b() = source_ptr[0];
		221	texture_color.a() = 0xFF;
		222
		223	DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
		224	}
		225
		226	// Texture environment - consists of 6 stages of color and alpha combining.
		227	//
		228	// Color combiners take three input color values from some source (e.g. interpolated
		229	// vertex color, texture color, previous stage, etc), perform some very simple
		230	// operations on each of them (e.g. inversion) and then calculate the output color
		231	// with some basic arithmetic. Alpha combiners can be configured separately but work
		232	// analogously.
		233	Math::Vec4<u8> combiner_output;
		234	for (auto tev_stage : registers.GetTevStages()) {
		235	using Source = Regs::TevStageConfig::Source;
		236	using ColorModifier = Regs::TevStageConfig::ColorModifier;
		237	using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
		238	using Operation = Regs::TevStageConfig::Operation;
		239
		240	auto GetColorSource = [&](Source source) -> Math::Vec3<u8> {
		241	switch (source) {
		242	case Source::PrimaryColor:
		243	return primary_color.rgb();
		244
		245	case Source::Texture0:
		246	return texture_color.rgb();
		247
		248	case Source::Constant:
		249	return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b};
		250
		251	case Source::Previous:
		252	return combiner_output.rgb();
		253
		254	default:
		255	ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source);
		256	return {};
		257	}
		258	};
		259
		260	auto GetAlphaSource = [&](Source source) -> u8 {
		261	switch (source) {
		262	case Source::PrimaryColor:
		263	return primary_color.a();
		264
		265	case Source::Texture0:
		266	return texture_color.a();
		267
		268	case Source::Constant:
		269	return tev_stage.const_a;
		270
		271	case Source::Previous:
		272	return combiner_output.a();
		273
		274	default:
		275	ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source);
		276	return 0;
		277	}
		278	};
		279
		280	auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> {
		281	switch (factor)
		282	{
		283	case ColorModifier::SourceColor:
		284	return values;
		285	default:
		286	ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
		287	return {};
		288	}
		289	};
		290
		291	auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
		292	switch (factor) {
		293	case AlphaModifier::SourceAlpha:
		294	return value;
		295	default:
		296	ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
		297	return 0;
		298	}
		299	};
		300
		301	auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
		302	switch (op) {
		303	case Operation::Replace:
		304	return input[0];
		305
		306	case Operation::Modulate:
		307	return ((input[0] * input[1]) / 255).Cast<u8>();
		308
		309	default:
		310	ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op);
		311	return {};
		312	}
		313	};
		314
		315	auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
		316	switch (op) {
		317	case Operation::Replace:
		318	return input[0];
		319
		320	case Operation::Modulate:
		321	return input[0] * input[1] / 255;
		322
		323	default:
		324	ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op);
		325	return 0;
		326	}
		327	};
		328
		329	// color combiner
		330	// NOTE: Not sure if the alpha combiner might use the color output of the previous
		331	// stage as input. Hence, we currently don't directly write the result to
		332	// combiner_output.rgb(), but instead store it in a temporary variable until
		333	// alpha combining has been done.
		334	Math::Vec3<u8> color_result[3] = {
		335	GetColorModifier(tev_stage.color_modifier1, GetColorSource(tev_stage.color_source1)),
		336	GetColorModifier(tev_stage.color_modifier2, GetColorSource(tev_stage.color_source2)),
		337	GetColorModifier(tev_stage.color_modifier3, GetColorSource(tev_stage.color_source3))
		338	};
		339	auto color_output = ColorCombine(tev_stage.color_op, color_result);
		340
		341	// alpha combiner
		342	std::array<u8,3> alpha_result = {
		343	GetAlphaModifier(tev_stage.alpha_modifier1, GetAlphaSource(tev_stage.alpha_source1)),
		344	GetAlphaModifier(tev_stage.alpha_modifier2, GetAlphaSource(tev_stage.alpha_source2)),
		345	GetAlphaModifier(tev_stage.alpha_modifier3, GetAlphaSource(tev_stage.alpha_source3))
		346	};
		347	auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
		348
		349	combiner_output = Math::MakeVec(color_output, alpha_output);
		350	}
		351
		352	// TODO: Not sure if the multiplication by 65535 has already been taken care
		353	// of when transforming to screen coordinates or not.
168	u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +	354	u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 +
169	(float)v1.screenpos[2].ToFloat32() * w1 +	355	(float)v1.screenpos[2].ToFloat32() * w1 +
170	(float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536?	356	(float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
171	SetDepth(x >> 4, y >> 4, z);	357	SetDepth(x >> 4, y >> 4, z);
172		358
173	DrawPixel(x >> 4, y >> 4, primary_color);	359	DrawPixel(x >> 4, y >> 4, combiner_output);
174	}	360	}
175	}	361	}
176	}	362	}