summaryrefslogtreecommitdiff
path: root/src/video_core/rasterizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/rasterizer.cpp')
-rw-r--r--src/video_core/rasterizer.cpp436
1 files changed, 243 insertions, 193 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 6f369a00e..6c4bbed33 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -5,7 +5,6 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <cmath> 7#include <cmath>
8
9#include "common/assert.h" 8#include "common/assert.h"
10#include "common/bit_field.h" 9#include "common/bit_field.h"
11#include "common/color.h" 10#include "common/color.h"
@@ -14,17 +13,15 @@
14#include "common/math_util.h" 13#include "common/math_util.h"
15#include "common/microprofile.h" 14#include "common/microprofile.h"
16#include "common/vector_math.h" 15#include "common/vector_math.h"
17
18#include "core/memory.h"
19#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
20 17#include "core/memory.h"
21#include "video_core/debug_utils/debug_utils.h" 18#include "video_core/debug_utils/debug_utils.h"
22#include "video_core/pica.h" 19#include "video_core/pica.h"
23#include "video_core/pica_state.h" 20#include "video_core/pica_state.h"
24#include "video_core/pica_types.h" 21#include "video_core/pica_types.h"
25#include "video_core/rasterizer.h" 22#include "video_core/rasterizer.h"
26#include "video_core/utils.h"
27#include "video_core/shader/shader.h" 23#include "video_core/shader/shader.h"
24#include "video_core/utils.h"
28 25
29namespace Pica { 26namespace Pica {
30 27
@@ -39,8 +36,10 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
39 y = framebuffer.height - y; 36 y = framebuffer.height - y;
40 37
41 const u32 coarse_y = y & ~7; 38 const u32 coarse_y = y & ~7;
42 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); 39 u32 bytes_per_pixel =
43 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; 40 GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
41 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
42 coarse_y * framebuffer.width * bytes_per_pixel;
44 u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; 43 u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
45 44
46 switch (framebuffer.color_format) { 45 switch (framebuffer.color_format) {
@@ -65,7 +64,8 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
65 break; 64 break;
66 65
67 default: 66 default:
68 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); 67 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
68 framebuffer.color_format.Value());
69 UNIMPLEMENTED(); 69 UNIMPLEMENTED();
70 } 70 }
71} 71}
@@ -77,8 +77,10 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
77 y = framebuffer.height - y; 77 y = framebuffer.height - y;
78 78
79 const u32 coarse_y = y & ~7; 79 const u32 coarse_y = y & ~7;
80 u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); 80 u32 bytes_per_pixel =
81 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; 81 GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
82 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
83 coarse_y * framebuffer.width * bytes_per_pixel;
82 u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; 84 u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
83 85
84 switch (framebuffer.color_format) { 86 switch (framebuffer.color_format) {
@@ -98,7 +100,8 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
98 return Color::DecodeRGBA4(src_pixel); 100 return Color::DecodeRGBA4(src_pixel);
99 101
100 default: 102 default:
101 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); 103 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
104 framebuffer.color_format.Value());
102 UNIMPLEMENTED(); 105 UNIMPLEMENTED();
103 } 106 }
104 107
@@ -120,16 +123,16 @@ static u32 GetDepth(int x, int y) {
120 u8* src_pixel = depth_buffer + src_offset; 123 u8* src_pixel = depth_buffer + src_offset;
121 124
122 switch (framebuffer.depth_format) { 125 switch (framebuffer.depth_format) {
123 case Regs::DepthFormat::D16: 126 case Regs::DepthFormat::D16:
124 return Color::DecodeD16(src_pixel); 127 return Color::DecodeD16(src_pixel);
125 case Regs::DepthFormat::D24: 128 case Regs::DepthFormat::D24:
126 return Color::DecodeD24(src_pixel); 129 return Color::DecodeD24(src_pixel);
127 case Regs::DepthFormat::D24S8: 130 case Regs::DepthFormat::D24S8:
128 return Color::DecodeD24S8(src_pixel).x; 131 return Color::DecodeD24S8(src_pixel).x;
129 default: 132 default:
130 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); 133 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
131 UNIMPLEMENTED(); 134 UNIMPLEMENTED();
132 return 0; 135 return 0;
133 } 136 }
134} 137}
135 138
@@ -148,12 +151,15 @@ static u8 GetStencil(int x, int y) {
148 u8* src_pixel = depth_buffer + src_offset; 151 u8* src_pixel = depth_buffer + src_offset;
149 152
150 switch (framebuffer.depth_format) { 153 switch (framebuffer.depth_format) {
151 case Regs::DepthFormat::D24S8: 154 case Regs::DepthFormat::D24S8:
152 return Color::DecodeD24S8(src_pixel).y; 155 return Color::DecodeD24S8(src_pixel).y;
153 156
154 default: 157 default:
155 LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); 158 LOG_WARNING(
156 return 0; 159 HW_GPU,
160 "GetStencil called for function which doesn't have a stencil component (format %u)",
161 framebuffer.depth_format);
162 return 0;
157 } 163 }
158} 164}
159 165
@@ -172,22 +178,22 @@ static void SetDepth(int x, int y, u32 value) {
172 u8* dst_pixel = depth_buffer + dst_offset; 178 u8* dst_pixel = depth_buffer + dst_offset;
173 179
174 switch (framebuffer.depth_format) { 180 switch (framebuffer.depth_format) {
175 case Regs::DepthFormat::D16: 181 case Regs::DepthFormat::D16:
176 Color::EncodeD16(value, dst_pixel); 182 Color::EncodeD16(value, dst_pixel);
177 break; 183 break;
178 184
179 case Regs::DepthFormat::D24: 185 case Regs::DepthFormat::D24:
180 Color::EncodeD24(value, dst_pixel); 186 Color::EncodeD24(value, dst_pixel);
181 break; 187 break;
182 188
183 case Regs::DepthFormat::D24S8: 189 case Regs::DepthFormat::D24S8:
184 Color::EncodeD24X8(value, dst_pixel); 190 Color::EncodeD24X8(value, dst_pixel);
185 break; 191 break;
186 192
187 default: 193 default:
188 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); 194 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
189 UNIMPLEMENTED(); 195 UNIMPLEMENTED();
190 break; 196 break;
191 } 197 }
192} 198}
193 199
@@ -206,19 +212,19 @@ static void SetStencil(int x, int y, u8 value) {
206 u8* dst_pixel = depth_buffer + dst_offset; 212 u8* dst_pixel = depth_buffer + dst_offset;
207 213
208 switch (framebuffer.depth_format) { 214 switch (framebuffer.depth_format) {
209 case Pica::Regs::DepthFormat::D16: 215 case Pica::Regs::DepthFormat::D16:
210 case Pica::Regs::DepthFormat::D24: 216 case Pica::Regs::DepthFormat::D24:
211 // Nothing to do 217 // Nothing to do
212 break; 218 break;
213 219
214 case Pica::Regs::DepthFormat::D24S8: 220 case Pica::Regs::DepthFormat::D24S8:
215 Color::EncodeX24S8(value, dst_pixel); 221 Color::EncodeX24S8(value, dst_pixel);
216 break; 222 break;
217 223
218 default: 224 default:
219 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); 225 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
220 UNIMPLEMENTED(); 226 UNIMPLEMENTED();
221 break; 227 break;
222 } 228 }
223} 229}
224 230
@@ -262,15 +268,19 @@ struct Fix12P4 {
262 Fix12P4() {} 268 Fix12P4() {}
263 Fix12P4(u16 val) : val(val) {} 269 Fix12P4(u16 val) : val(val) {}
264 270
265 static u16 FracMask() { return 0xF; } 271 static u16 FracMask() {
266 static u16 IntMask() { return (u16)~0xF; } 272 return 0xF;
273 }
274 static u16 IntMask() {
275 return (u16)~0xF;
276 }
267 277
268 operator u16() const { 278 operator u16() const {
269 return val; 279 return val;
270 } 280 }
271 281
272 bool operator < (const Fix12P4& oth) const { 282 bool operator<(const Fix12P4& oth) const {
273 return (u16)*this < (u16)oth; 283 return (u16) * this < (u16)oth;
274 } 284 }
275 285
276private: 286private:
@@ -283,9 +293,8 @@ private:
283 * 293 *
284 * @todo define orientation concretely. 294 * @todo define orientation concretely.
285 */ 295 */
286static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, 296static int SignedArea(const Math::Vec2<Fix12P4>& vtx1, const Math::Vec2<Fix12P4>& vtx2,
287 const Math::Vec2<Fix12P4>& vtx2, 297 const Math::Vec2<Fix12P4>& vtx3) {
288 const Math::Vec2<Fix12P4>& vtx3) {
289 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); 298 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
290 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); 299 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
291 // TODO: There is a very small chance this will overflow for sizeof(int) == 4 300 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
@@ -298,11 +307,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24
298 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing 307 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
299 * culling via recursion. 308 * culling via recursion.
300 */ 309 */
301static void ProcessTriangleInternal(const Shader::OutputVertex& v0, 310static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
302 const Shader::OutputVertex& v1, 311 const Shader::OutputVertex& v2, bool reversed = false) {
303 const Shader::OutputVertex& v2,
304 bool reversed = false)
305{
306 const auto& regs = g_state.regs; 312 const auto& regs = g_state.regs;
307 MICROPROFILE_SCOPE(GPU_Rasterization); 313 MICROPROFILE_SCOPE(GPU_Rasterization);
308 314
@@ -316,9 +322,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
316 return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; 322 return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
317 }; 323 };
318 324
319 Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), 325 Math::Vec3<Fix12P4> vtxpos[3]{ScreenToRasterizerCoordinates(v0.screenpos),
320 ScreenToRasterizerCoordinates(v1.screenpos), 326 ScreenToRasterizerCoordinates(v1.screenpos),
321 ScreenToRasterizerCoordinates(v2.screenpos) }; 327 ScreenToRasterizerCoordinates(v2.screenpos)};
322 328
323 if (regs.cull_mode == Regs::CullMode::KeepAll) { 329 if (regs.cull_mode == Regs::CullMode::KeepAll) {
324 // Make sure we always end up with a triangle wound counter-clockwise 330 // Make sure we always end up with a triangle wound counter-clockwise
@@ -344,8 +350,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
344 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); 350 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
345 351
346 // Convert the scissor box coordinates to 12.4 fixed point 352 // Convert the scissor box coordinates to 12.4 fixed point
347 u16 scissor_x1 = (u16)( regs.scissor_test.x1 << 4); 353 u16 scissor_x1 = (u16)(regs.scissor_test.x1 << 4);
348 u16 scissor_y1 = (u16)( regs.scissor_test.y1 << 4); 354 u16 scissor_y1 = (u16)(regs.scissor_test.y1 << 4);
349 // x2,y2 have +1 added to cover the entire sub-pixel area 355 // x2,y2 have +1 added to cover the entire sub-pixel area
350 u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); 356 u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4);
351 u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); 357 u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4);
@@ -369,27 +375,32 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
369 // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... 375 // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones...
370 auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, 376 auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx,
371 const Math::Vec2<Fix12P4>& line1, 377 const Math::Vec2<Fix12P4>& line1,
372 const Math::Vec2<Fix12P4>& line2) 378 const Math::Vec2<Fix12P4>& line2) {
373 {
374 if (line1.y == line2.y) { 379 if (line1.y == line2.y) {
375 // just check if vertex is above us => bottom line parallel to x-axis 380 // just check if vertex is above us => bottom line parallel to x-axis
376 return vtx.y < line1.y; 381 return vtx.y < line1.y;
377 } else { 382 } else {
378 // check if vertex is on our left => right side 383 // check if vertex is on our left => right side
379 // TODO: Not sure how likely this is to overflow 384 // TODO: Not sure how likely this is to overflow
380 return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y); 385 return (int)vtx.x < (int)line1.x +
386 ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) /
387 ((int)line2.y - (int)line1.y);
381 } 388 }
382 }; 389 };
383 int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; 390 int bias0 =
384 int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; 391 IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0;
385 int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; 392 int bias1 =
393 IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
394 int bias2 =
395 IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
386 396
387 auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); 397 auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
388 398
389 auto textures = regs.GetTextures(); 399 auto textures = regs.GetTextures();
390 auto tev_stages = regs.GetTevStages(); 400 auto tev_stages = regs.GetTevStages();
391 401
392 bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; 402 bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable &&
403 g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8;
393 const auto stencil_test = g_state.regs.output_merger.stencil_test; 404 const auto stencil_test = g_state.regs.output_merger.stencil_test;
394 405
395 // Enter rasterization loop, starting at the center of the topleft bounding box corner. 406 // Enter rasterization loop, starting at the center of the topleft bounding box corner.
@@ -397,10 +408,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
397 for (u16 y = min_y + 8; y < max_y; y += 0x10) { 408 for (u16 y = min_y + 8; y < max_y; y += 0x10) {
398 for (u16 x = min_x + 8; x < max_x; x += 0x10) { 409 for (u16 x = min_x + 8; x < max_x; x += 0x10) {
399 410
400 // Do not process the pixel if it's inside the scissor box and the scissor mode is set to Exclude 411 // Do not process the pixel if it's inside the scissor box and the scissor mode is set
412 // to Exclude
401 if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { 413 if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) {
402 if (x >= scissor_x1 && x < scissor_x2 && 414 if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2)
403 y >= scissor_y1 && y < scissor_y2)
404 continue; 415 continue;
405 } 416 }
406 417
@@ -414,15 +425,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
414 if (w0 < 0 || w1 < 0 || w2 < 0) 425 if (w0 < 0 || w1 < 0 || w2 < 0)
415 continue; 426 continue;
416 427
417 auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), 428 auto baricentric_coordinates =
418 float24::FromFloat32(static_cast<float>(w1)), 429 Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)),
419 float24::FromFloat32(static_cast<float>(w2))); 430 float24::FromFloat32(static_cast<float>(w1)),
420 float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); 431 float24::FromFloat32(static_cast<float>(w2)));
432 float24 interpolated_w_inverse =
433 float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
421 434
422 // interpolated_z = z / w 435 // interpolated_z = z / w
423 float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + 436 float interpolated_z_over_w =
424 v1.screenpos[2].ToFloat32() * w1 + 437 (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 +
425 v2.screenpos[2].ToFloat32() * w2) / wsum; 438 v2.screenpos[2].ToFloat32() * w2) /
439 wsum;
426 440
427 // Not fully accurate. About 3 bits in precision are missing. 441 // Not fully accurate. About 3 bits in precision are missing.
428 // Z-Buffer (z / w * scale + offset) 442 // Z-Buffer (z / w * scale + offset)
@@ -461,10 +475,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
461 }; 475 };
462 476
463 Math::Vec4<u8> primary_color{ 477 Math::Vec4<u8> primary_color{
464 (u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255), 478 (u8)(
465 (u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255), 479 GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() *
466 (u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255), 480 255),
467 (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) 481 (u8)(
482 GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() *
483 255),
484 (u8)(
485 GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() *
486 255),
487 (u8)(
488 GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() *
489 255),
468 }; 490 };
469 491
470 Math::Vec2<float24> uv[3]; 492 Math::Vec2<float24> uv[3];
@@ -489,7 +511,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
489 // Only unit 0 respects the texturing type (according to 3DBrew) 511 // Only unit 0 respects the texturing type (according to 3DBrew)
490 // TODO: Refactor so cubemaps and shadowmaps can be handled 512 // TODO: Refactor so cubemaps and shadowmaps can be handled
491 if (i == 0) { 513 if (i == 0) {
492 switch(texture.config.type) { 514 switch (texture.config.type) {
493 case Regs::TextureConfig::Texture2D: 515 case Regs::TextureConfig::Texture2D:
494 break; 516 break;
495 case Regs::TextureConfig::Projection2D: { 517 case Regs::TextureConfig::Projection2D: {
@@ -506,51 +528,58 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
506 } 528 }
507 } 529 }
508 530
509 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); 531 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width)))
510 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); 532 .ToFloat32();
533 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height)))
534 .ToFloat32();
511 535
512 536 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val,
513 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { 537 unsigned size) {
514 switch (mode) { 538 switch (mode) {
515 case Regs::TextureConfig::ClampToEdge: 539 case Regs::TextureConfig::ClampToEdge:
516 val = std::max(val, 0); 540 val = std::max(val, 0);
517 val = std::min(val, (int)size - 1); 541 val = std::min(val, (int)size - 1);
518 return val; 542 return val;
519 543
520 case Regs::TextureConfig::ClampToBorder: 544 case Regs::TextureConfig::ClampToBorder:
521 return val; 545 return val;
522 546
523 case Regs::TextureConfig::Repeat: 547 case Regs::TextureConfig::Repeat:
524 return (int)((unsigned)val % size); 548 return (int)((unsigned)val % size);
525 549
526 case Regs::TextureConfig::MirroredRepeat: 550 case Regs::TextureConfig::MirroredRepeat: {
527 { 551 unsigned int coord = ((unsigned)val % (2 * size));
528 unsigned int coord = ((unsigned)val % (2 * size)); 552 if (coord >= size)
529 if (coord >= size) 553 coord = 2 * size - 1 - coord;
530 coord = 2 * size - 1 - coord; 554 return (int)coord;
531 return (int)coord; 555 }
532 } 556
533 557 default:
534 default: 558 LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode);
535 LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); 559 UNIMPLEMENTED();
536 UNIMPLEMENTED(); 560 return 0;
537 return 0;
538 } 561 }
539 }; 562 };
540 563
541 if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && (s < 0 || s >= texture.config.width)) 564 if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder &&
542 || (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && (t < 0 || t >= texture.config.height))) { 565 (s < 0 || s >= texture.config.width)) ||
566 (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder &&
567 (t < 0 || t >= texture.config.height))) {
543 auto border_color = texture.config.border_color; 568 auto border_color = texture.config.border_color;
544 texture_color[i] = { border_color.r, border_color.g, border_color.b, border_color.a }; 569 texture_color[i] = {border_color.r, border_color.g, border_color.b,
570 border_color.a};
545 } else { 571 } else {
546 // Textures are laid out from bottom to top, hence we invert the t coordinate. 572 // Textures are laid out from bottom to top, hence we invert the t coordinate.
547 // NOTE: This may not be the right place for the inversion. 573 // NOTE: This may not be the right place for the inversion.
548 // TODO: Check if this applies to ETC textures, too. 574 // TODO: Check if this applies to ETC textures, too.
549 s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); 575 s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
550 t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); 576 t = texture.config.height - 1 -
577 GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
551 578
552 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); 579 u8* texture_data =
553 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); 580 Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
581 auto info =
582 DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
554 583
555 // TODO: Apply the min and mag filters to the texture 584 // TODO: Apply the min and mag filters to the texture
556 texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); 585 texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
@@ -571,10 +600,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
571 Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; 600 Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0};
572 Math::Vec4<u8> next_combiner_buffer = { 601 Math::Vec4<u8> next_combiner_buffer = {
573 regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, 602 regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g,
574 regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a 603 regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a,
575 }; 604 };
576 605
577 for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { 606 for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size();
607 ++tev_stage_index) {
578 const auto& tev_stage = tev_stages[tev_stage_index]; 608 const auto& tev_stage = tev_stages[tev_stage_index];
579 using Source = Regs::TevStageConfig::Source; 609 using Source = Regs::TevStageConfig::Source;
580 using ColorModifier = Regs::TevStageConfig::ColorModifier; 610 using ColorModifier = Regs::TevStageConfig::ColorModifier;
@@ -606,7 +636,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
606 return combiner_buffer; 636 return combiner_buffer;
607 637
608 case Source::Constant: 638 case Source::Constant:
609 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; 639 return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b,
640 tev_stage.const_a};
610 641
611 case Source::Previous: 642 case Source::Previous:
612 return combiner_output; 643 return combiner_output;
@@ -618,7 +649,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
618 } 649 }
619 }; 650 };
620 651
621 static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { 652 static auto GetColorModifier = [](ColorModifier factor,
653 const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
622 switch (factor) { 654 switch (factor) {
623 case ColorModifier::SourceColor: 655 case ColorModifier::SourceColor:
624 return values.rgb(); 656 return values.rgb();
@@ -652,7 +684,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
652 } 684 }
653 }; 685 };
654 686
655 static auto GetAlphaModifier = [](AlphaModifier factor, const Math::Vec4<u8>& values) -> u8 { 687 static auto GetAlphaModifier = [](AlphaModifier factor,
688 const Math::Vec4<u8>& values) -> u8 {
656 switch (factor) { 689 switch (factor) {
657 case AlphaModifier::SourceAlpha: 690 case AlphaModifier::SourceAlpha:
658 return values.a(); 691 return values.a();
@@ -680,7 +713,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
680 } 713 }
681 }; 714 };
682 715
683 static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { 716 static auto ColorCombine = [](Operation op,
717 const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
684 switch (op) { 718 switch (op) {
685 case Operation::Replace: 719 case Operation::Replace:
686 return input[0]; 720 return input[0];
@@ -688,8 +722,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
688 case Operation::Modulate: 722 case Operation::Modulate:
689 return ((input[0] * input[1]) / 255).Cast<u8>(); 723 return ((input[0] * input[1]) / 255).Cast<u8>();
690 724
691 case Operation::Add: 725 case Operation::Add: {
692 {
693 auto result = input[0] + input[1]; 726 auto result = input[0] + input[1];
694 result.r() = std::min(255, result.r()); 727 result.r() = std::min(255, result.r());
695 result.g() = std::min(255, result.g()); 728 result.g() = std::min(255, result.g());
@@ -697,10 +730,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
697 return result.Cast<u8>(); 730 return result.Cast<u8>();
698 } 731 }
699 732
700 case Operation::AddSigned: 733 case Operation::AddSigned: {
701 { 734 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
702 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct 735 // (byte) 128 is correct
703 auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); 736 auto result = input[0].Cast<int>() + input[1].Cast<int>() -
737 Math::MakeVec<int>(128, 128, 128);
704 result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); 738 result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
705 result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); 739 result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
706 result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); 740 result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
@@ -708,10 +742,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
708 } 742 }
709 743
710 case Operation::Lerp: 744 case Operation::Lerp:
711 return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); 745 return ((input[0] * input[2] +
746 input[1] *
747 (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) /
748 255)
749 .Cast<u8>();
712 750
713 case Operation::Subtract: 751 case Operation::Subtract: {
714 {
715 auto result = input[0].Cast<int>() - input[1].Cast<int>(); 752 auto result = input[0].Cast<int>() - input[1].Cast<int>();
716 result.r() = std::max(0, result.r()); 753 result.r() = std::max(0, result.r());
717 result.g() = std::max(0, result.g()); 754 result.g() = std::max(0, result.g());
@@ -719,8 +756,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
719 return result.Cast<u8>(); 756 return result.Cast<u8>();
720 } 757 }
721 758
722 case Operation::MultiplyThenAdd: 759 case Operation::MultiplyThenAdd: {
723 {
724 auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; 760 auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255;
725 result.r() = std::min(255, result.r()); 761 result.r() = std::min(255, result.r());
726 result.g() = std::min(255, result.g()); 762 result.g() = std::min(255, result.g());
@@ -728,8 +764,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
728 return result.Cast<u8>(); 764 return result.Cast<u8>();
729 } 765 }
730 766
731 case Operation::AddThenMultiply: 767 case Operation::AddThenMultiply: {
732 {
733 auto result = input[0] + input[1]; 768 auto result = input[0] + input[1];
734 result.r() = std::min(255, result.r()); 769 result.r() = std::min(255, result.r());
735 result.g() = std::min(255, result.g()); 770 result.g() = std::min(255, result.g());
@@ -737,17 +772,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
737 result = (result * input[2].Cast<int>()) / 255; 772 result = (result * input[2].Cast<int>()) / 255;
738 return result.Cast<u8>(); 773 return result.Cast<u8>();
739 } 774 }
740 case Operation::Dot3_RGB: 775 case Operation::Dot3_RGB: {
741 {
742 // Not fully accurate. 776 // Not fully accurate.
743 // Worst case scenario seems to yield a +/-3 error 777 // Worst case scenario seems to yield a +/-3 error
744 // Some HW results indicate that the per-component computation can't have a higher precision than 1/256, 778 // Some HW results indicate that the per-component computation can't have a
745 // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results 779 // higher precision than 1/256,
746 int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + 780 // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb(
747 ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + 781 // (0x80,g0,b0),(0x80,g1,b1) ) give different results
748 ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; 782 int result =
783 ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
784 ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
785 ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
749 result = std::max(0, std::min(255, result)); 786 result = std::max(0, std::min(255, result));
750 return { (u8)result, (u8)result, (u8)result }; 787 return {(u8)result, (u8)result, (u8)result};
751 } 788 }
752 default: 789 default:
753 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); 790 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op);
@@ -756,7 +793,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
756 } 793 }
757 }; 794 };
758 795
759 static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { 796 static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 {
760 switch (op) { 797 switch (op) {
761 case Operation::Replace: 798 case Operation::Replace:
762 return input[0]; 799 return input[0];
@@ -767,9 +804,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
767 case Operation::Add: 804 case Operation::Add:
768 return std::min(255, input[0] + input[1]); 805 return std::min(255, input[0] + input[1]);
769 806
770 case Operation::AddSigned: 807 case Operation::AddSigned: {
771 { 808 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
772 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct 809 // (byte) 128 is correct
773 auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; 810 auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
774 return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); 811 return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
775 } 812 }
@@ -801,32 +838,38 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
801 Math::Vec3<u8> color_result[3] = { 838 Math::Vec3<u8> color_result[3] = {
802 GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), 839 GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)),
803 GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), 840 GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)),
804 GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)) 841 GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)),
805 }; 842 };
806 auto color_output = ColorCombine(tev_stage.color_op, color_result); 843 auto color_output = ColorCombine(tev_stage.color_op, color_result);
807 844
808 // alpha combiner 845 // alpha combiner
809 std::array<u8,3> alpha_result = {{ 846 std::array<u8, 3> alpha_result = {{
810 GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)), 847 GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)),
811 GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)), 848 GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)),
812 GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)) 849 GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)),
813 }}; 850 }};
814 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); 851 auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
815 852
816 combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); 853 combiner_output[0] =
817 combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); 854 std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());
818 combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); 855 combiner_output[1] =
819 combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); 856 std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier());
857 combiner_output[2] =
858 std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
859 combiner_output[3] =
860 std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
820 861
821 combiner_buffer = next_combiner_buffer; 862 combiner_buffer = next_combiner_buffer;
822 863
823 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { 864 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(
865 tev_stage_index)) {
824 next_combiner_buffer.r() = combiner_output.r(); 866 next_combiner_buffer.r() = combiner_output.r();
825 next_combiner_buffer.g() = combiner_output.g(); 867 next_combiner_buffer.g() = combiner_output.g();
826 next_combiner_buffer.b() = combiner_output.b(); 868 next_combiner_buffer.b() = combiner_output.b();
827 } 869 }
828 870
829 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { 871 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(
872 tev_stage_index)) {
830 next_combiner_buffer.a() = combiner_output.a(); 873 next_combiner_buffer.a() = combiner_output.a();
831 } 874 }
832 } 875 }
@@ -897,21 +940,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
897 float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); 940 float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f);
898 float fog_f = fog_index - fog_i; 941 float fog_f = fog_index - fog_i;
899 const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; 942 const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)];
900 float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / 2047.0f; // This is signed fixed point 1.11 943 float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) /
944 2047.0f; // This is signed fixed point 1.11
901 fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); 945 fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f);
902 946
903 // Blend the fog 947 // Blend the fog
904 for (unsigned i = 0; i < 3; i++) { 948 for (unsigned i = 0; i < 3; i++) {
905 combiner_output[i] = fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; 949 combiner_output[i] =
950 fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i];
906 } 951 }
907 } 952 }
908 953
909 u8 old_stencil = 0; 954 u8 old_stencil = 0;
910 955
911 auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { 956 auto UpdateStencil = [stencil_test, x, y,
912 u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); 957 &old_stencil](Pica::Regs::StencilAction action) {
958 u8 new_stencil =
959 PerformStencilAction(action, old_stencil, stencil_test.reference_value);
913 if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) 960 if (g_state.regs.framebuffer.allow_depth_stencil_write != 0)
914 SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); 961 SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) |
962 (old_stencil & ~stencil_test.write_mask));
915 }; 963 };
916 964
917 if (stencil_action_enable) { 965 if (stencil_action_enable) {
@@ -1030,7 +1078,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1030 static_cast<u8>(output_merger.blend_const.r), 1078 static_cast<u8>(output_merger.blend_const.r),
1031 static_cast<u8>(output_merger.blend_const.g), 1079 static_cast<u8>(output_merger.blend_const.g),
1032 static_cast<u8>(output_merger.blend_const.b), 1080 static_cast<u8>(output_merger.blend_const.b),
1033 static_cast<u8>(output_merger.blend_const.a) 1081 static_cast<u8>(output_merger.blend_const.a),
1034 }; 1082 };
1035 1083
1036 switch (factor) { 1084 switch (factor) {
@@ -1091,12 +1139,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1091 return combiner_output[channel]; 1139 return combiner_output[channel];
1092 }; 1140 };
1093 1141
1094 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, 1142 static auto EvaluateBlendEquation = [](
1095 const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, 1143 const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
1096 Regs::BlendEquation equation) { 1144 const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
1145 Regs::BlendEquation equation) {
1097 Math::Vec4<int> result; 1146 Math::Vec4<int> result;
1098 1147
1099 auto src_result = (src * srcfactor).Cast<int>(); 1148 auto src_result = (src * srcfactor).Cast<int>();
1100 auto dst_result = (dest * destfactor).Cast<int>(); 1149 auto dst_result = (dest * destfactor).Cast<int>();
1101 1150
1102 switch (equation) { 1151 switch (equation) {
@@ -1134,10 +1183,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1134 UNIMPLEMENTED(); 1183 UNIMPLEMENTED();
1135 } 1184 }
1136 1185
1137 return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), 1186 return Math::Vec4<u8>(
1138 MathUtil::Clamp(result.g(), 0, 255), 1187 MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255),
1139 MathUtil::Clamp(result.b(), 0, 255), 1188 MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255));
1140 MathUtil::Clamp(result.a(), 0, 255));
1141 }; 1189 };
1142 1190
1143 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), 1191 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb),
@@ -1150,8 +1198,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1150 LookupFactor(2, params.factor_dest_rgb), 1198 LookupFactor(2, params.factor_dest_rgb),
1151 LookupFactor(3, params.factor_dest_a)); 1199 LookupFactor(3, params.factor_dest_a));
1152 1200
1153 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); 1201 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor,
1154 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); 1202 params.blend_equation_rgb);
1203 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest,
1204 dstfactor, params.blend_equation_a)
1205 .a();
1155 } else { 1206 } else {
1156 static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { 1207 static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 {
1157 switch (op) { 1208 switch (op) {
@@ -1205,18 +1256,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1205 } 1256 }
1206 }; 1257 };
1207 1258
1208 blend_output = Math::MakeVec( 1259 blend_output =
1209 LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), 1260 Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op),
1210 LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), 1261 LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op),
1211 LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), 1262 LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op),
1212 LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); 1263 LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op));
1213 } 1264 }
1214 1265
1215 const Math::Vec4<u8> result = { 1266 const Math::Vec4<u8> result = {
1216 output_merger.red_enable ? blend_output.r() : dest.r(), 1267 output_merger.red_enable ? blend_output.r() : dest.r(),
1217 output_merger.green_enable ? blend_output.g() : dest.g(), 1268 output_merger.green_enable ? blend_output.g() : dest.g(),
1218 output_merger.blue_enable ? blend_output.b() : dest.b(), 1269 output_merger.blue_enable ? blend_output.b() : dest.b(),
1219 output_merger.alpha_enable ? blend_output.a() : dest.a() 1270 output_merger.alpha_enable ? blend_output.a() : dest.a(),
1220 }; 1271 };
1221 1272
1222 if (regs.framebuffer.allow_color_write != 0) 1273 if (regs.framebuffer.allow_color_write != 0)
@@ -1225,8 +1276,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1225 } 1276 }
1226} 1277}
1227 1278
1228void ProcessTriangle(const Shader::OutputVertex& v0, 1279void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
1229 const Shader::OutputVertex& v1,
1230 const Shader::OutputVertex& v2) { 1280 const Shader::OutputVertex& v2) {
1231 ProcessTriangleInternal(v0, v1, v2); 1281 ProcessTriangleInternal(v0, v1, v2);
1232} 1282}