Fix merge conflicts

author: darkf 2014-12-29 19:47:41 -0800
committer: darkf 2014-12-29 19:47:41 -0800
commit: 8ba9ac0f74abb0408a26207a76a0c1808bad8de0 (patch)
tree: f1c7c3393fa726435b5b90bf335567c93e528ef1 /src/video_core/rasterizer.cpp
parent: Add comment regarding __WIN32__ in SkyEye code (diff)
parent: Merge pull request #367 from bunnei/usat_ssat (diff)
download: yuzu-8ba9ac0f74abb0408a26207a76a0c1808bad8de0.tar.gz
yuzu-8ba9ac0f74abb0408a26207a76a0c1808bad8de0.tar.xz
yuzu-8ba9ac0f74abb0408a26207a76a0c1808bad8de0.zip
1 files changed, 117 insertions, 85 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a35f0c0d8..a80148872 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -1,5 +1,5 @@
 // Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2
+// Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
@@ -18,7 +18,7 @@ namespace Pica {
 namespace Rasterizer {
 static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
-    u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress());
+    u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
    u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
    // Assuming RGBA8 format until actual framebuffer format handling is implemented
@@ -26,14 +26,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
 }
 static u32 GetDepth(int x, int y) {
-    u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());
+    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
    // Assuming 16-bit depth buffer format until actual format handling is implemented
    return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
 }
 static void SetDepth(int x, int y, u16 value) {
-    u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());
+    u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
    // Assuming 16-bit depth buffer format until actual format handling is implemented
    *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
@@ -106,6 +106,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
    int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
    int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
+    auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
+    auto textures = registers.GetTextures();
+    auto tev_stages = registers.GetTevStages();
    // TODO: Not sure if looping through x first might be faster
    for (u16 y = min_y; y < max_y; y += 0x10) {
        for (u16 x = min_x; x < max_x; x += 0x10) {
@@ -129,6 +134,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
            if (w0 < 0 || w1 < 0 || w2 < 0)
                continue;
+            auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
+                                                float24::FromFloat32(static_cast<float>(w1)),
+                                                float24::FromFloat32(static_cast<float>(w2)));
+            float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
            // Perspective correct attribute interpolation:
            // Attribute values cannot be calculated by simple linear interpolation since
            // they are not linear in screen space. For example, when interpolating a
@@ -145,19 +155,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
            //
            // The generalization to three vertices is straightforward in baricentric coordinates.
            auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
-                auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w,
+                auto attr_over_w = Math::MakeVec(attr0, attr1, attr2);
-                                                 attr1 / v1.pos.w,
-                                                 attr2 / v2.pos.w);
-                auto w_inverse   = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
-                                                 float24::FromFloat32(1.f) / v1.pos.w,
-                                                 float24::FromFloat32(1.f) / v2.pos.w);
-                auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
-                                                             float24::FromFloat32(static_cast<float>(w1)),
-                                                             float24::FromFloat32(static_cast<float>(w2)));
                float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
-                float24 interpolated_w_inverse   = Math::Dot(w_inverse,   baricentric_coordinates);
+                return interpolated_attr_over_w * interpolated_w_inverse;
-                return interpolated_attr_over_w / interpolated_w_inverse;
            };
            Math::Vec4<u8> primary_color{
@@ -167,60 +167,48 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
            };
-            Math::Vec4<u8> texture_color{};
+            Math::Vec2<float24> uv[3];
-            float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
+            uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
-            float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
+            uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
-            if (registers.texturing_enable) {
+            uv[1].u() = GetInterpolatedAttribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u());
-                // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
+            uv[1].v() = GetInterpolatedAttribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v());
-                // of which is composed of four 2x2 subtiles each of which is composed of four texels.
+            uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
-                // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
+            uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
-                // texels are laid out in a 2x2 subtile like this:
-                // 2 3
+            Math::Vec4<u8> texture_color[3]{};
-                // 0 1
+            for (int i = 0; i < 3; ++i) {
-                //
+                const auto& texture = textures[i];
-                // The full 8x8 tile has the texels arranged like this:
+                if (!texture.enabled)
-                //
+                    continue;
-                // 42 43 46 47 58 59 62 63
-                // 40 41 44 45 56 57 60 61
+                _dbg_assert_(HW_GPU, 0 != texture.config.address);
-                // 34 35 38 39 50 51 54 55
-                // 32 33 36 37 48 49 52 53
+                int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
-                // 10 11 14 15 26 27 30 31
+                int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
-                // 08 09 12 13 24 25 28 29
+                auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
-                // 02 03 06 07 18 19 22 23
+                    switch (mode) {
-                // 00 01 04 05 16 17 20 21
+                        case Regs::TextureConfig::ClampToEdge:
+                            val = std::max(val, 0);
-                // TODO: This is currently hardcoded for RGB8
+                            val = std::min(val, (int)size - 1);
-                u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress());
+                            return val;
-                // TODO(neobrain): Not sure if this swizzling pattern is used for all textures.
+                        case Regs::TextureConfig::Repeat:
-                // To be flexible in case different but similar patterns are used, we keep this
+                            return (int)(((unsigned)val) % size);
-                // somewhat inefficient code around for now.
-                int s = (int)(u * float24::FromFloat32(static_cast<float>(registers.texture0.width))).ToFloat32();
+                        default:
-                int t = (int)(v * float24::FromFloat32(static_cast<float>(registers.texture0.height))).ToFloat32();
+                            LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode);
-                int texel_index_within_tile = 0;
+                            _dbg_assert_(HW_GPU, 0);
-                for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
+                            return 0;
-                    int sub_tile_width = 1 << block_size_index;
+                    }
-                    int sub_tile_height = 1 << block_size_index;
+                };
+                s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
-                    int sub_tile_index = (s & sub_tile_width) << block_size_index;
+                t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
-                    sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index);
-                    texel_index_within_tile += sub_tile_index;
+                u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
-                }
+                auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
-                const int block_width = 8;
+                texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
-                const int block_height = 8;
+                DebugUtils::DumpTexture(texture.config, texture_data);
-                int coarse_s = (s / block_width) * block_width;
-                int coarse_t = (t / block_height) * block_height;
-                const int row_stride = registers.texture0.width * 3;
-                u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
-                texture_color.r() = source_ptr[2];
-                texture_color.g() = source_ptr[1];
-                texture_color.b() = source_ptr[0];
-                texture_color.a() = 0xFF;
-                DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
            }
            // Texture environment - consists of 6 stages of color and alpha combining.
@@ -231,28 +219,35 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
            // with some basic arithmetic. Alpha combiners can be configured separately but work
            // analogously.
            Math::Vec4<u8> combiner_output;
-            for (auto tev_stage : registers.GetTevStages()) {
+            for (const auto& tev_stage : tev_stages) {
                using Source = Regs::TevStageConfig::Source;
                using ColorModifier = Regs::TevStageConfig::ColorModifier;
                using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
                using Operation = Regs::TevStageConfig::Operation;
-                auto GetColorSource = [&](Source source) -> Math::Vec3<u8> {
+                auto GetColorSource = [&](Source source) -> Math::Vec4<u8> {
                    switch (source) {
                    case Source::PrimaryColor:
-                        return primary_color.rgb();
+                        return primary_color;
                    case Source::Texture0:
-                        return texture_color.rgb();
+                        return texture_color[0];
+                    case Source::Texture1:
+                        return texture_color[1];
+                    case Source::Texture2:
+                        return texture_color[2];
                    case Source::Constant:
-                        return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b};
+                        return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
                    case Source::Previous:
-                        return combiner_output.rgb();
+                        return combiner_output;
                    default:
-                        ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source);
+                        LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
+                        _dbg_assert_(HW_GPU, 0);
                        return {};
                    }
                };
@@ -263,7 +258,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                        return primary_color.a();
                    case Source::Texture0:
-                        return texture_color.a();
+                        return texture_color[0].a();
+                    case Source::Texture1:
+                        return texture_color[1].a();
+                    case Source::Texture2:
+                        return texture_color[2].a();
                    case Source::Constant:
                        return tev_stage.const_a;
@@ -272,18 +273,24 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                        return combiner_output.a();
                    default:
-                        ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source);
+                        LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source);
+                        _dbg_assert_(HW_GPU, 0);
                        return 0;
                    }
                };
-                auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> {
+                auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
                    switch (factor)
                    {
                    case ColorModifier::SourceColor:
-                        return values;
+                        return values.rgb();
+                    case ColorModifier::SourceAlpha:
+                        return { values.a(), values.a(), values.a() };
                    default:
-                        ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
+                        LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor);
+                        _dbg_assert_(HW_GPU, 0);
                        return {};
                    }
                };
@@ -292,8 +299,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                    switch (factor) {
                    case AlphaModifier::SourceAlpha:
                        return value;
+                    case AlphaModifier::OneMinusSourceAlpha:
+                        return 255 - value;
                    default:
-                        ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);
+                        LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor);
+                        _dbg_assert_(HW_GPU, 0);
                        return 0;
                    }
                };
@@ -306,8 +318,21 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                    case Operation::Modulate:
                        return ((input[0] * input[1]) / 255).Cast<u8>();
+                    case Operation::Add:
+                    {
+                        auto result = input[0] + input[1];
+                        result.r() = std::min(255, result.r());
+                        result.g() = std::min(255, result.g());
+                        result.b() = std::min(255, result.b());
+                        return result.Cast<u8>();
+                    }
+                    case Operation::Lerp:
+                        return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
                    default:
-                        ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op);
+                        LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
+                        _dbg_assert_(HW_GPU, 0);
                        return {};
                    }
                };
@@ -320,8 +345,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
                    case Operation::Modulate:
                        return input[0] * input[1] / 255;
+                    case Operation::Add:
+                        return std::min(255, input[0] + input[1]);
+                    case Operation::Lerp:
+                        return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
                    default:
-                        ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op);
+                        LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
+                        _dbg_assert_(HW_GPU, 0);
                        return 0;
                    }
                };
author	darkf	2014-12-29 19:47:41 -0800
committer	darkf	2014-12-29 19:47:41 -0800
commit	8ba9ac0f74abb0408a26207a76a0c1808bad8de0 (patch)
tree	f1c7c3393fa726435b5b90bf335567c93e528ef1 /src/video_core/rasterizer.cpp
parent	Add comment regarding __WIN32__ in SkyEye code (diff)
parent	Merge pull request #367 from bunnei/usat_ssat (diff)
download	yuzu-8ba9ac0f74abb0408a26207a76a0c1808bad8de0.tar.gz yuzu-8ba9ac0f74abb0408a26207a76a0c1808bad8de0.tar.xz yuzu-8ba9ac0f74abb0408a26207a76a0c1808bad8de0.zip

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a35f0c0d8..a80148872 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp
@@ -1,5 +1,5 @@
1	// Copyright 2014 Citra Emulator Project	1	// Copyright 2014 Citra Emulator Project
2	// Licensed under GPLv2	2	// Licensed under GPLv2 or any later version
3	// Refer to the license.txt file included.	3	// Refer to the license.txt file included.
4		4
5	#include <algorithm>	5	#include <algorithm>
@@ -18,7 +18,7 @@ namespace Pica {
18	namespace Rasterizer {	18	namespace Rasterizer {
19		19
20	static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {	20	static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
21	u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress());	21	u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
22	u32 value = (color.a() << 24) \| (color.r() << 16) \| (color.g() << 8) \| color.b();	22	u32 value = (color.a() << 24) \| (color.r() << 16) \| (color.g() << 8) \| color.b();
23		23
24	// Assuming RGBA8 format until actual framebuffer format handling is implemented	24	// Assuming RGBA8 format until actual framebuffer format handling is implemented
@@ -26,14 +26,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
26	}	26	}
27		27
28	static u32 GetDepth(int x, int y) {	28	static u32 GetDepth(int x, int y) {
29	u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());	29	u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
30		30
31	// Assuming 16-bit depth buffer format until actual format handling is implemented	31	// Assuming 16-bit depth buffer format until actual format handling is implemented
32	return (depth_buffer + x + y registers.framebuffer.GetWidth());	32	return (depth_buffer + x + y registers.framebuffer.GetWidth());
33	}	33	}
34		34
35	static void SetDepth(int x, int y, u16 value) {	35	static void SetDepth(int x, int y, u16 value) {
36	u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress());	36	u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
37		37
38	// Assuming 16-bit depth buffer format until actual format handling is implemented	38	// Assuming 16-bit depth buffer format until actual format handling is implemented
39	(depth_buffer + x + y registers.framebuffer.GetWidth()) = value;	39	(depth_buffer + x + y registers.framebuffer.GetWidth()) = value;
@@ -106,6 +106,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
106	int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;	106	int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
107	int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;	107	int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
108		108
		109	auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
		110
		111	auto textures = registers.GetTextures();
		112	auto tev_stages = registers.GetTevStages();
		113
109	// TODO: Not sure if looping through x first might be faster	114	// TODO: Not sure if looping through x first might be faster
110	for (u16 y = min_y; y < max_y; y += 0x10) {	115	for (u16 y = min_y; y < max_y; y += 0x10) {
111	for (u16 x = min_x; x < max_x; x += 0x10) {	116	for (u16 x = min_x; x < max_x; x += 0x10) {
@@ -129,6 +134,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
129	if (w0 < 0 \|\| w1 < 0 \|\| w2 < 0)	134	if (w0 < 0 \|\| w1 < 0 \|\| w2 < 0)
130	continue;	135	continue;
131		136
		137	auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
		138	float24::FromFloat32(static_cast<float>(w1)),
		139	float24::FromFloat32(static_cast<float>(w2)));
		140	float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
		141
132	// Perspective correct attribute interpolation:	142	// Perspective correct attribute interpolation:
133	// Attribute values cannot be calculated by simple linear interpolation since	143	// Attribute values cannot be calculated by simple linear interpolation since
134	// they are not linear in screen space. For example, when interpolating a	144	// they are not linear in screen space. For example, when interpolating a
@@ -145,19 +155,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
145	//	155	//
146	// The generalization to three vertices is straightforward in baricentric coordinates.	156	// The generalization to three vertices is straightforward in baricentric coordinates.
147	auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {	157	auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) {
148	auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w,	158	auto attr_over_w = Math::MakeVec(attr0, attr1, attr2);
149	attr1 / v1.pos.w,
150	attr2 / v2.pos.w);
151	auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w,
152	float24::FromFloat32(1.f) / v1.pos.w,
153	float24::FromFloat32(1.f) / v2.pos.w);
154	auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
155	float24::FromFloat32(static_cast<float>(w1)),
156	float24::FromFloat32(static_cast<float>(w2)));
157
158	float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);	159	float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates);
159	float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates);	160	return interpolated_attr_over_w * interpolated_w_inverse;
160	return interpolated_attr_over_w / interpolated_w_inverse;
161	};	161	};
162		162
163	Math::Vec4<u8> primary_color{	163	Math::Vec4<u8> primary_color{
@@ -167,60 +167,48 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
167	(u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)	167	(u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
168	};	168	};
169		169
170	Math::Vec4<u8> texture_color{};	170	Math::Vec2<float24> uv[3];
171	float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());	171	uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
172	float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());	172	uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
173	if (registers.texturing_enable) {	173	uv[1].u() = GetInterpolatedAttribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u());
174	// Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each	174	uv[1].v() = GetInterpolatedAttribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v());
175	// of which is composed of four 2x2 subtiles each of which is composed of four texels.	175	uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
176	// Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.	176	uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
177	// texels are laid out in a 2x2 subtile like this:	177
178	// 2 3	178	Math::Vec4<u8> texture_color[3]{};
179	// 0 1	179	for (int i = 0; i < 3; ++i) {
180	//	180	const auto& texture = textures[i];
181	// The full 8x8 tile has the texels arranged like this:	181	if (!texture.enabled)
182	//	182	continue;
183	// 42 43 46 47 58 59 62 63	183
184	// 40 41 44 45 56 57 60 61	184	_dbg_assert_(HW_GPU, 0 != texture.config.address);
185	// 34 35 38 39 50 51 54 55	185
186	// 32 33 36 37 48 49 52 53	186	int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
187	// 10 11 14 15 26 27 30 31	187	int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
188	// 08 09 12 13 24 25 28 29	188	auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
189	// 02 03 06 07 18 19 22 23	189	switch (mode) {
190	// 00 01 04 05 16 17 20 21	190	case Regs::TextureConfig::ClampToEdge:
191		191	val = std::max(val, 0);
192	// TODO: This is currently hardcoded for RGB8	192	val = std::min(val, (int)size - 1);
193	u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress());	193	return val;
194		194
195	// TODO(neobrain): Not sure if this swizzling pattern is used for all textures.	195	case Regs::TextureConfig::Repeat:
196	// To be flexible in case different but similar patterns are used, we keep this	196	return (int)(((unsigned)val) % size);
197	// somewhat inefficient code around for now.	197
198	int s = (int)(u * float24::FromFloat32(static_cast<float>(registers.texture0.width))).ToFloat32();	198	default:
199	int t = (int)(v * float24::FromFloat32(static_cast<float>(registers.texture0.height))).ToFloat32();	199	LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode);
200	int texel_index_within_tile = 0;	200	_dbg_assert_(HW_GPU, 0);
201	for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {	201	return 0;
202	int sub_tile_width = 1 << block_size_index;	202	}
203	int sub_tile_height = 1 << block_size_index;	203	};
204		204	s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
205	int sub_tile_index = (s & sub_tile_width) << block_size_index;	205	t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
206	sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index);	206
207	texel_index_within_tile += sub_tile_index;	207	u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
208	}	208	auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
209		209
210	const int block_width = 8;	210	texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
211	const int block_height = 8;	211	DebugUtils::DumpTexture(texture.config, texture_data);
212
213	int coarse_s = (s / block_width) * block_width;
214	int coarse_t = (t / block_height) * block_height;
215
216	const int row_stride = registers.texture0.width * 3;
217	u8* source_ptr = (u8)texture_data + coarse_s block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
218	texture_color.r() = source_ptr[2];
219	texture_color.g() = source_ptr[1];
220	texture_color.b() = source_ptr[0];
221	texture_color.a() = 0xFF;
222
223	DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
224	}	212	}
225		213
226	// Texture environment - consists of 6 stages of color and alpha combining.	214	// Texture environment - consists of 6 stages of color and alpha combining.
@@ -231,28 +219,35 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
231	// with some basic arithmetic. Alpha combiners can be configured separately but work	219	// with some basic arithmetic. Alpha combiners can be configured separately but work
232	// analogously.	220	// analogously.
233	Math::Vec4<u8> combiner_output;	221	Math::Vec4<u8> combiner_output;
234	for (auto tev_stage : registers.GetTevStages()) {	222	for (const auto& tev_stage : tev_stages) {
235	using Source = Regs::TevStageConfig::Source;	223	using Source = Regs::TevStageConfig::Source;
236	using ColorModifier = Regs::TevStageConfig::ColorModifier;	224	using ColorModifier = Regs::TevStageConfig::ColorModifier;
237	using AlphaModifier = Regs::TevStageConfig::AlphaModifier;	225	using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
238	using Operation = Regs::TevStageConfig::Operation;	226	using Operation = Regs::TevStageConfig::Operation;
239		227
240	auto GetColorSource = [&](Source source) -> Math::Vec3<u8> {	228	auto GetColorSource = [&](Source source) -> Math::Vec4<u8> {
241	switch (source) {	229	switch (source) {
242	case Source::PrimaryColor:	230	case Source::PrimaryColor:
243	return primary_color.rgb();	231	return primary_color;
244		232
245	case Source::Texture0:	233	case Source::Texture0:
246	return texture_color.rgb();	234	return texture_color[0];
		235
		236	case Source::Texture1:
		237	return texture_color[1];
		238
		239	case Source::Texture2:
		240	return texture_color[2];
247		241
248	case Source::Constant:	242	case Source::Constant:
249	return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b};	243	return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
250		244
251	case Source::Previous:	245	case Source::Previous:
252	return combiner_output.rgb();	246	return combiner_output;
253		247
254	default:	248	default:
255	ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source);	249	LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
		250	_dbg_assert_(HW_GPU, 0);
256	return {};	251	return {};
257	}	252	}
258	};	253	};
@@ -263,7 +258,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
263	return primary_color.a();	258	return primary_color.a();
264		259
265	case Source::Texture0:	260	case Source::Texture0:
266	return texture_color.a();	261	return texture_color[0].a();
		262
		263	case Source::Texture1:
		264	return texture_color[1].a();
		265
		266	case Source::Texture2:
		267	return texture_color[2].a();
267		268
268	case Source::Constant:	269	case Source::Constant:
269	return tev_stage.const_a;	270	return tev_stage.const_a;
@@ -272,18 +273,24 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
272	return combiner_output.a();	273	return combiner_output.a();
273		274
274	default:	275	default:
275	ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source);	276	LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source);
		277	_dbg_assert_(HW_GPU, 0);
276	return 0;	278	return 0;
277	}	279	}
278	};	280	};
279		281
280	auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> {	282	auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
281	switch (factor)	283	switch (factor)
282	{	284	{
283	case ColorModifier::SourceColor:	285	case ColorModifier::SourceColor:
284	return values;	286	return values.rgb();
		287
		288	case ColorModifier::SourceAlpha:
		289	return { values.a(), values.a(), values.a() };
		290
285	default:	291	default:
286	ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);	292	LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor);
		293	_dbg_assert_(HW_GPU, 0);
287	return {};	294	return {};
288	}	295	}
289	};	296	};
@@ -292,8 +299,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
292	switch (factor) {	299	switch (factor) {
293	case AlphaModifier::SourceAlpha:	300	case AlphaModifier::SourceAlpha:
294	return value;	301	return value;
		302
		303	case AlphaModifier::OneMinusSourceAlpha:
		304	return 255 - value;
		305
295	default:	306	default:
296	ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor);	307	LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor);
		308	_dbg_assert_(HW_GPU, 0);
297	return 0;	309	return 0;
298	}	310	}
299	};	311	};
@@ -306,8 +318,21 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
306	case Operation::Modulate:	318	case Operation::Modulate:
307	return ((input[0] * input[1]) / 255).Cast<u8>();	319	return ((input[0] * input[1]) / 255).Cast<u8>();
308		320
		321	case Operation::Add:
		322	{
		323	auto result = input[0] + input[1];
		324	result.r() = std::min(255, result.r());
		325	result.g() = std::min(255, result.g());
		326	result.b() = std::min(255, result.b());
		327	return result.Cast<u8>();
		328	}
		329
		330	case Operation::Lerp:
		331	return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
		332
309	default:	333	default:
310	ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op);	334	LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
		335	_dbg_assert_(HW_GPU, 0);
311	return {};	336	return {};
312	}	337	}
313	};	338	};
@@ -320,8 +345,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
320	case Operation::Modulate:	345	case Operation::Modulate:
321	return input[0] * input[1] / 255;	346	return input[0] * input[1] / 255;
322		347
		348	case Operation::Add:
		349	return std::min(255, input[0] + input[1]);
		350
		351	case Operation::Lerp:
		352	return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
		353
323	default:	354	default:
324	ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op);	355	LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
		356	_dbg_assert_(HW_GPU, 0);
325	return 0;	357	return 0;
326	}	358	}
327	};	359	};