summaryrefslogtreecommitdiff
path: root/src/video_core/rasterizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/rasterizer.cpp')
-rw-r--r--src/video_core/rasterizer.cpp241
1 files changed, 195 insertions, 46 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index a80148872..025d4e484 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -18,51 +18,82 @@ namespace Pica {
18namespace Rasterizer { 18namespace Rasterizer {
19 19
20static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { 20static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
21 u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress()))); 21 const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
22 u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
22 u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); 23 u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
23 24
24 // Assuming RGBA8 format until actual framebuffer format handling is implemented 25 // Assuming RGBA8 format until actual framebuffer format handling is implemented
25 *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; 26 *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value;
26} 27}
27 28
29static const Math::Vec4<u8> GetPixel(int x, int y) {
30 const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress();
31 u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr)));
32
33 u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth());
34 Math::Vec4<u8> ret;
35 ret.a() = value >> 24;
36 ret.r() = (value >> 16) & 0xFF;
37 ret.g() = (value >> 8) & 0xFF;
38 ret.b() = value & 0xFF;
39 return ret;
40 }
41
28static u32 GetDepth(int x, int y) { 42static u32 GetDepth(int x, int y) {
29 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); 43 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
44 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
30 45
31 // Assuming 16-bit depth buffer format until actual format handling is implemented 46 // Assuming 16-bit depth buffer format until actual format handling is implemented
32 return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); 47 return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
33} 48}
34 49
35static void SetDepth(int x, int y, u16 value) { 50static void SetDepth(int x, int y, u16 value) {
36 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); 51 const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
52 u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr)));
37 53
38 // Assuming 16-bit depth buffer format until actual format handling is implemented 54 // Assuming 16-bit depth buffer format until actual format handling is implemented
39 *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; 55 *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
40} 56}
41 57
42void ProcessTriangle(const VertexShader::OutputVertex& v0, 58// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
43 const VertexShader::OutputVertex& v1, 59struct Fix12P4 {
44 const VertexShader::OutputVertex& v2) 60 Fix12P4() {}
45{ 61 Fix12P4(u16 val) : val(val) {}
46 // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
47 struct Fix12P4 {
48 Fix12P4() {}
49 Fix12P4(u16 val) : val(val) {}
50 62
51 static u16 FracMask() { return 0xF; } 63 static u16 FracMask() { return 0xF; }
52 static u16 IntMask() { return (u16)~0xF; } 64 static u16 IntMask() { return (u16)~0xF; }
53 65
54 operator u16() const { 66 operator u16() const {
55 return val; 67 return val;
56 } 68 }
57 69
58 bool operator < (const Fix12P4& oth) const { 70 bool operator < (const Fix12P4& oth) const {
59 return (u16)*this < (u16)oth; 71 return (u16)*this < (u16)oth;
60 } 72 }
61 73
62 private: 74private:
63 u16 val; 75 u16 val;
64 }; 76};
77
78/**
79 * Calculate signed area of the triangle spanned by the three argument vertices.
80 * The sign denotes an orientation.
81 *
82 * @todo define orientation concretely.
83 */
84static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
85 const Math::Vec2<Fix12P4>& vtx2,
86 const Math::Vec2<Fix12P4>& vtx3) {
87 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
88 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
89 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
90 return Math::Cross(vec1, vec2).z;
91};
65 92
93void ProcessTriangle(const VertexShader::OutputVertex& v0,
94 const VertexShader::OutputVertex& v1,
95 const VertexShader::OutputVertex& v2)
96{
66 // vertex positions in rasterizer coordinates 97 // vertex positions in rasterizer coordinates
67 auto FloatToFix = [](float24 flt) { 98 auto FloatToFix = [](float24 flt) {
68 return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f)); 99 return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f));
@@ -70,10 +101,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
70 auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { 101 auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) {
71 return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; 102 return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)};
72 }; 103 };
104
73 Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), 105 Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos),
74 ScreenToRasterizerCoordinates(v1.screenpos), 106 ScreenToRasterizerCoordinates(v1.screenpos),
75 ScreenToRasterizerCoordinates(v2.screenpos) }; 107 ScreenToRasterizerCoordinates(v2.screenpos) };
76 108
109 if (registers.cull_mode == Regs::CullMode::KeepClockWise) {
110 // Reverse vertex order and use the CCW code path.
111 std::swap(vtxpos[1], vtxpos[2]);
112 }
113
114 if (registers.cull_mode != Regs::CullMode::KeepAll) {
115 // Cull away triangles which are wound clockwise.
116 // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
117 if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
118 return;
119 }
120
77 // TODO: Proper scissor rect test! 121 // TODO: Proper scissor rect test!
78 u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); 122 u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
79 u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); 123 u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
@@ -116,18 +160,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
116 for (u16 x = min_x; x < max_x; x += 0x10) { 160 for (u16 x = min_x; x < max_x; x += 0x10) {
117 161
118 // Calculate the barycentric coordinates w0, w1 and w2 162 // Calculate the barycentric coordinates w0, w1 and w2
119 auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, 163 int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
120 const Math::Vec2<Fix12P4>& vtx2, 164 int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
121 const Math::Vec2<Fix12P4>& vtx3) { 165 int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
122 const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
123 const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
124 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
125 return Math::Cross(vec1, vec2).z;
126 };
127
128 int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
129 int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
130 int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
131 int wsum = w0 + w1 + w2; 166 int wsum = w0 + w1 + w2;
132 167
133 // If current pixel is not covered by the current primitive 168 // If current pixel is not covered by the current primitive
@@ -201,8 +236,8 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
201 return 0; 236 return 0;
202 } 237 }
203 }; 238 };
204 s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width); 239 s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
205 t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height); 240 t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
206 241
207 u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress())); 242 u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
208 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); 243 auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
@@ -279,12 +314,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
279 } 314 }
280 }; 315 };
281 316
282 auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { 317 static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
283 switch (factor) 318 switch (factor)
284 { 319 {
285 case ColorModifier::SourceColor: 320 case ColorModifier::SourceColor:
286 return values.rgb(); 321 return values.rgb();
287 322
323 case ColorModifier::OneMinusSourceColor:
324 return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
325
288 case ColorModifier::SourceAlpha: 326 case ColorModifier::SourceAlpha:
289 return { values.a(), values.a(), values.a() }; 327 return { values.a(), values.a(), values.a() };
290 328
@@ -295,7 +333,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
295 } 333 }
296 }; 334 };
297 335
298 auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { 336 static auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 {
299 switch (factor) { 337 switch (factor) {
300 case AlphaModifier::SourceAlpha: 338 case AlphaModifier::SourceAlpha:
301 return value; 339 return value;
@@ -310,7 +348,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
310 } 348 }
311 }; 349 };
312 350
313 auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { 351 static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
314 switch (op) { 352 switch (op) {
315 case Operation::Replace: 353 case Operation::Replace:
316 return input[0]; 354 return input[0];
@@ -330,6 +368,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
330 case Operation::Lerp: 368 case Operation::Lerp:
331 return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); 369 return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
332 370
371 case Operation::Subtract:
372 {
373 auto result = input[0].Cast<int>() - input[1].Cast<int>();
374 result.r() = std::max(0, result.r());
375 result.g() = std::max(0, result.g());
376 result.b() = std::max(0, result.b());
377 return result.Cast<u8>();
378 }
379
333 default: 380 default:
334 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); 381 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
335 _dbg_assert_(HW_GPU, 0); 382 _dbg_assert_(HW_GPU, 0);
@@ -337,7 +384,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
337 } 384 }
338 }; 385 };
339 386
340 auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { 387 static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 {
341 switch (op) { 388 switch (op) {
342 case Operation::Replace: 389 case Operation::Replace:
343 return input[0]; 390 return input[0];
@@ -351,6 +398,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
351 case Operation::Lerp: 398 case Operation::Lerp:
352 return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; 399 return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
353 400
401 case Operation::Subtract:
402 return std::max(0, (int)input[0] - (int)input[1]);
403
354 default: 404 default:
355 LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); 405 LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
356 _dbg_assert_(HW_GPU, 0); 406 _dbg_assert_(HW_GPU, 0);
@@ -381,12 +431,111 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
381 combiner_output = Math::MakeVec(color_output, alpha_output); 431 combiner_output = Math::MakeVec(color_output, alpha_output);
382 } 432 }
383 433
384 // TODO: Not sure if the multiplication by 65535 has already been taken care 434 // TODO: Does depth indeed only get written even if depth testing is enabled?
385 // of when transforming to screen coordinates or not. 435 if (registers.output_merger.depth_test_enable) {
386 u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + 436 u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 +
387 (float)v1.screenpos[2].ToFloat32() * w1 + 437 v1.screenpos[2].ToFloat32() * w1 +
388 (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); 438 v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
389 SetDepth(x >> 4, y >> 4, z); 439 u16 ref_z = GetDepth(x >> 4, y >> 4);
440
441 bool pass = false;
442
443 switch (registers.output_merger.depth_test_func) {
444 case registers.output_merger.Always:
445 pass = true;
446 break;
447
448 case registers.output_merger.LessThan:
449 pass = z < ref_z;
450 break;
451
452 case registers.output_merger.GreaterThan:
453 pass = z > ref_z;
454 break;
455
456 default:
457 LOG_ERROR(HW_GPU, "Unknown depth test function %x", registers.output_merger.depth_test_func.Value());
458 break;
459 }
460
461 if (!pass)
462 continue;
463
464 if (registers.output_merger.depth_write_enable)
465 SetDepth(x >> 4, y >> 4, z);
466 }
467
468 auto dest = GetPixel(x >> 4, y >> 4);
469
470 if (registers.output_merger.alphablend_enable) {
471 auto params = registers.output_merger.alpha_blending;
472
473 auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> {
474 switch(factor) {
475 case params.Zero:
476 return Math::Vec3<u8>(0, 0, 0);
477
478 case params.One:
479 return Math::Vec3<u8>(255, 255, 255);
480
481 case params.SourceAlpha:
482 return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a());
483
484 case params.OneMinusSourceAlpha:
485 return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a());
486
487 default:
488 LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
489 exit(0);
490 break;
491 }
492 };
493
494 auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 {
495 switch(factor) {
496 case params.Zero:
497 return 0;
498
499 case params.One:
500 return 255;
501
502 case params.SourceAlpha:
503 return combiner_output.a();
504
505 case params.OneMinusSourceAlpha:
506 return 255 - combiner_output.a();
507
508 default:
509 LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor);
510 exit(0);
511 break;
512 }
513 };
514
515 auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb),
516 LookupFactorA(params.factor_source_a));
517 auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb),
518 LookupFactorA(params.factor_dest_a));
519
520 switch (params.blend_equation_rgb) {
521 case params.Add:
522 {
523 auto result = (combiner_output * srcfactor + dest * dstfactor) / 255;
524 result.r() = std::min(255, result.r());
525 result.g() = std::min(255, result.g());
526 result.b() = std::min(255, result.b());
527 combiner_output = result.Cast<u8>();
528 break;
529 }
530
531 default:
532 LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value());
533 exit(0);
534 }
535 } else {
536 LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op);
537 exit(0);
538 }
390 539
391 DrawPixel(x >> 4, y >> 4, combiner_output); 540 DrawPixel(x >> 4, y >> 4, combiner_output);
392 } 541 }