diff options
| author | 2015-02-18 17:19:38 -0500 | |
|---|---|---|
| committer | 2015-02-18 17:19:38 -0500 | |
| commit | 4a48b017ca7fe8fe68dfc84d70864ef6aea6a266 (patch) | |
| tree | dcd7914a3a2147790d384ce0992f70d40bce8704 /src/video_core/rasterizer.cpp | |
| parent | Merge pull request #570 from purpasmart96/config_mem (diff) | |
| parent | Pica/Rasterizer: Replace exit() calls with UNIMPLEMENTED(). (diff) | |
| download | yuzu-4a48b017ca7fe8fe68dfc84d70864ef6aea6a266.tar.gz yuzu-4a48b017ca7fe8fe68dfc84d70864ef6aea6a266.tar.xz yuzu-4a48b017ca7fe8fe68dfc84d70864ef6aea6a266.zip | |
Merge pull request #562 from neobrain/pica_progress3
More PICA200 Emulation Fixes
Diffstat (limited to 'src/video_core/rasterizer.cpp')
| -rw-r--r-- | src/video_core/rasterizer.cpp | 252 |
1 files changed, 162 insertions, 90 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 3faa10153..94873f406 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "common/math_util.h" | ||
| 8 | 9 | ||
| 9 | #include "math.h" | 10 | #include "math.h" |
| 10 | #include "pica.h" | 11 | #include "pica.h" |
| @@ -20,16 +21,31 @@ namespace Rasterizer { | |||
| 20 | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | 21 | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { |
| 21 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); | 22 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); |
| 22 | u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); | 23 | u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); |
| 23 | u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); | ||
| 24 | 24 | ||
| 25 | // Assuming RGBA8 format until actual framebuffer format handling is implemented | 25 | // Similarly to textures, the render framebuffer is laid out from bottom to top, too. |
| 26 | *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; | 26 | // NOTE: The framebuffer height register contains the actual FB height minus one. |
| 27 | y = (registers.framebuffer.height - y); | ||
| 28 | |||
| 29 | switch (registers.framebuffer.color_format) { | ||
| 30 | case registers.framebuffer.RGBA8: | ||
| 31 | { | ||
| 32 | u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); | ||
| 33 | *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | |||
| 37 | default: | ||
| 38 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format); | ||
| 39 | UNIMPLEMENTED(); | ||
| 40 | } | ||
| 27 | } | 41 | } |
| 28 | 42 | ||
| 29 | static const Math::Vec4<u8> GetPixel(int x, int y) { | 43 | static const Math::Vec4<u8> GetPixel(int x, int y) { |
| 30 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); | 44 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); |
| 31 | u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); | 45 | u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); |
| 32 | 46 | ||
| 47 | y = (registers.framebuffer.height - y); | ||
| 48 | |||
| 33 | u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth()); | 49 | u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth()); |
| 34 | Math::Vec4<u8> ret; | 50 | Math::Vec4<u8> ret; |
| 35 | ret.a() = value >> 24; | 51 | ret.a() = value >> 24; |
| @@ -43,6 +59,8 @@ static u32 GetDepth(int x, int y) { | |||
| 43 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | 59 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |
| 44 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | 60 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); |
| 45 | 61 | ||
| 62 | y = (registers.framebuffer.height - y); | ||
| 63 | |||
| 46 | // Assuming 16-bit depth buffer format until actual format handling is implemented | 64 | // Assuming 16-bit depth buffer format until actual format handling is implemented |
| 47 | return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); | 65 | return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); |
| 48 | } | 66 | } |
| @@ -51,6 +69,8 @@ static void SetDepth(int x, int y, u16 value) { | |||
| 51 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | 69 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); |
| 52 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); | 70 | u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); |
| 53 | 71 | ||
| 72 | y = (registers.framebuffer.height - y); | ||
| 73 | |||
| 54 | // Assuming 16-bit depth buffer format until actual format handling is implemented | 74 | // Assuming 16-bit depth buffer format until actual format handling is implemented |
| 55 | *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; | 75 | *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; |
| 56 | } | 76 | } |
| @@ -90,30 +110,43 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, | |||
| 90 | return Math::Cross(vec1, vec2).z; | 110 | return Math::Cross(vec1, vec2).z; |
| 91 | }; | 111 | }; |
| 92 | 112 | ||
| 93 | void ProcessTriangle(const VertexShader::OutputVertex& v0, | 113 | /** |
| 94 | const VertexShader::OutputVertex& v1, | 114 | * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing |
| 95 | const VertexShader::OutputVertex& v2) | 115 | * culling via recursion. |
| 116 | */ | ||
| 117 | static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | ||
| 118 | const VertexShader::OutputVertex& v1, | ||
| 119 | const VertexShader::OutputVertex& v2, | ||
| 120 | bool reversed = false) | ||
| 96 | { | 121 | { |
| 97 | // vertex positions in rasterizer coordinates | 122 | // vertex positions in rasterizer coordinates |
| 98 | auto FloatToFix = [](float24 flt) { | 123 | static auto FloatToFix = [](float24 flt) { |
| 99 | return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f)); | 124 | // TODO: Rounding here is necessary to prevent garbage pixels at |
| 100 | }; | 125 | // triangle borders. Is it that the correct solution, though? |
| 101 | auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { | 126 | return Fix12P4(static_cast<unsigned short>(round(flt.ToFloat32() * 16.0f))); |
| 102 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | 127 | }; |
| 103 | }; | 128 | static auto ScreenToRasterizerCoordinates = [](const Math::Vec3<float24>& vec) { |
| 129 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | ||
| 130 | }; | ||
| 104 | 131 | ||
| 105 | Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), | 132 | Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), |
| 106 | ScreenToRasterizerCoordinates(v1.screenpos), | 133 | ScreenToRasterizerCoordinates(v1.screenpos), |
| 107 | ScreenToRasterizerCoordinates(v2.screenpos) }; | 134 | ScreenToRasterizerCoordinates(v2.screenpos) }; |
| 108 | 135 | ||
| 109 | if (registers.cull_mode == Regs::CullMode::KeepClockWise) { | 136 | if (registers.cull_mode == Regs::CullMode::KeepAll) { |
| 110 | // Reverse vertex order and use the CCW code path. | 137 | // Make sure we always end up with a triangle wound counter-clockwise |
| 111 | std::swap(vtxpos[1], vtxpos[2]); | 138 | if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { |
| 112 | } | 139 | ProcessTriangleInternal(v0, v2, v1, true); |
| 140 | return; | ||
| 141 | } | ||
| 142 | } else { | ||
| 143 | if (!reversed && registers.cull_mode == Regs::CullMode::KeepClockWise) { | ||
| 144 | // Reverse vertex order and use the CCW code path. | ||
| 145 | ProcessTriangleInternal(v0, v2, v1, true); | ||
| 146 | return; | ||
| 147 | } | ||
| 113 | 148 | ||
| 114 | if (registers.cull_mode != Regs::CullMode::KeepAll) { | ||
| 115 | // Cull away triangles which are wound clockwise. | 149 | // Cull away triangles which are wound clockwise. |
| 116 | // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll | ||
| 117 | if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) | 150 | if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) |
| 118 | return; | 151 | return; |
| 119 | } | 152 | } |
| @@ -155,9 +188,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 155 | auto textures = registers.GetTextures(); | 188 | auto textures = registers.GetTextures(); |
| 156 | auto tev_stages = registers.GetTevStages(); | 189 | auto tev_stages = registers.GetTevStages(); |
| 157 | 190 | ||
| 191 | // Enter rasterization loop, starting at the center of the topleft bounding box corner. | ||
| 158 | // TODO: Not sure if looping through x first might be faster | 192 | // TODO: Not sure if looping through x first might be faster |
| 159 | for (u16 y = min_y; y < max_y; y += 0x10) { | 193 | for (u16 y = min_y + 8; y < max_y; y += 0x10) { |
| 160 | for (u16 x = min_x; x < max_x; x += 0x10) { | 194 | for (u16 x = min_x + 8; x < max_x; x += 0x10) { |
| 161 | 195 | ||
| 162 | // Calculate the barycentric coordinates w0, w1 and w2 | 196 | // Calculate the barycentric coordinates w0, w1 and w2 |
| 163 | int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | 197 | int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); |
| @@ -220,7 +254,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 220 | 254 | ||
| 221 | int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); | 255 | int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); |
| 222 | int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); | 256 | int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); |
| 223 | auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { | 257 | static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { |
| 224 | switch (mode) { | 258 | switch (mode) { |
| 225 | case Regs::TextureConfig::ClampToEdge: | 259 | case Regs::TextureConfig::ClampToEdge: |
| 226 | val = std::max(val, 0); | 260 | val = std::max(val, 0); |
| @@ -228,7 +262,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 228 | return val; | 262 | return val; |
| 229 | 263 | ||
| 230 | case Regs::TextureConfig::Repeat: | 264 | case Regs::TextureConfig::Repeat: |
| 231 | return (int)(((unsigned)val) % size); | 265 | return (int)((unsigned)val % size); |
| 266 | |||
| 267 | case Regs::TextureConfig::MirroredRepeat: | ||
| 268 | { | ||
| 269 | int val = (int)((unsigned)val % (2 * size)); | ||
| 270 | if (val >= size) | ||
| 271 | val = 2 * size - 1 - val; | ||
| 272 | return val; | ||
| 273 | } | ||
| 232 | 274 | ||
| 233 | default: | 275 | default: |
| 234 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode); | 276 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode); |
| @@ -236,6 +278,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 236 | return 0; | 278 | return 0; |
| 237 | } | 279 | } |
| 238 | }; | 280 | }; |
| 281 | |||
| 282 | // Textures are laid out from bottom to top, hence we invert the t coordinate. | ||
| 283 | // NOTE: This may not be the right place for the inversion. | ||
| 284 | // TODO: Check if this applies to ETC textures, too. | ||
| 239 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); | 285 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); |
| 240 | t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | 286 | t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); |
| 241 | 287 | ||
| @@ -262,7 +308,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 262 | 308 | ||
| 263 | auto GetSource = [&](Source source) -> Math::Vec4<u8> { | 309 | auto GetSource = [&](Source source) -> Math::Vec4<u8> { |
| 264 | switch (source) { | 310 | switch (source) { |
| 311 | // TODO: What's the difference between these two? | ||
| 265 | case Source::PrimaryColor: | 312 | case Source::PrimaryColor: |
| 313 | case Source::PrimaryFragmentColor: | ||
| 266 | return primary_color; | 314 | return primary_color; |
| 267 | 315 | ||
| 268 | case Source::Texture0: | 316 | case Source::Texture0: |
| @@ -378,6 +426,25 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 378 | return result.Cast<u8>(); | 426 | return result.Cast<u8>(); |
| 379 | } | 427 | } |
| 380 | 428 | ||
| 429 | case Operation::MultiplyThenAdd: | ||
| 430 | { | ||
| 431 | auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | ||
| 432 | result.r() = std::min(255, result.r()); | ||
| 433 | result.g() = std::min(255, result.g()); | ||
| 434 | result.b() = std::min(255, result.b()); | ||
| 435 | return result.Cast<u8>(); | ||
| 436 | } | ||
| 437 | |||
| 438 | case Operation::AddThenMultiply: | ||
| 439 | { | ||
| 440 | auto result = input[0] + input[1]; | ||
| 441 | result.r() = std::min(255, result.r()); | ||
| 442 | result.g() = std::min(255, result.g()); | ||
| 443 | result.b() = std::min(255, result.b()); | ||
| 444 | result = (result * input[2].Cast<int>()) / 255; | ||
| 445 | return result.Cast<u8>(); | ||
| 446 | } | ||
| 447 | |||
| 381 | default: | 448 | default: |
| 382 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); | 449 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); |
| 383 | UNIMPLEMENTED(); | 450 | UNIMPLEMENTED(); |
| @@ -402,6 +469,12 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 402 | case Operation::Subtract: | 469 | case Operation::Subtract: |
| 403 | return std::max(0, (int)input[0] - (int)input[1]); | 470 | return std::max(0, (int)input[0] - (int)input[1]); |
| 404 | 471 | ||
| 472 | case Operation::MultiplyThenAdd: | ||
| 473 | return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); | ||
| 474 | |||
| 475 | case Operation::AddThenMultiply: | ||
| 476 | return (std::min(255, (input[0] + input[1])) * input[2]) / 255; | ||
| 477 | |||
| 405 | default: | 478 | default: |
| 406 | LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); | 479 | LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); |
| 407 | UNIMPLEMENTED(); | 480 | UNIMPLEMENTED(); |
| @@ -475,7 +548,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 475 | 548 | ||
| 476 | // TODO: Does depth indeed only get written even if depth testing is enabled? | 549 | // TODO: Does depth indeed only get written even if depth testing is enabled? |
| 477 | if (registers.output_merger.depth_test_enable) { | 550 | if (registers.output_merger.depth_test_enable) { |
| 478 | u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 + | 551 | u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + |
| 479 | v1.screenpos[2].ToFloat32() * w1 + | 552 | v1.screenpos[2].ToFloat32() * w1 + |
| 480 | v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); | 553 | v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); |
| 481 | u16 ref_z = GetDepth(x >> 4, y >> 4); | 554 | u16 ref_z = GetDepth(x >> 4, y >> 4); |
| @@ -524,6 +597,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 524 | } | 597 | } |
| 525 | 598 | ||
| 526 | auto dest = GetPixel(x >> 4, y >> 4); | 599 | auto dest = GetPixel(x >> 4, y >> 4); |
| 600 | Math::Vec4<u8> blend_output = combiner_output; | ||
| 527 | 601 | ||
| 528 | if (registers.output_merger.alphablend_enable) { | 602 | if (registers.output_merger.alphablend_enable) { |
| 529 | auto params = registers.output_merger.alpha_blending; | 603 | auto params = registers.output_merger.alpha_blending; |
| @@ -574,7 +648,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 574 | 648 | ||
| 575 | default: | 649 | default: |
| 576 | LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); | 650 | LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); |
| 577 | exit(0); | 651 | UNIMPLEMENTED(); |
| 578 | break; | 652 | break; |
| 579 | } | 653 | } |
| 580 | }; | 654 | }; |
| @@ -607,86 +681,78 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 607 | 681 | ||
| 608 | default: | 682 | default: |
| 609 | LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); | 683 | LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); |
| 610 | exit(0); | 684 | UNIMPLEMENTED(); |
| 685 | break; | ||
| 686 | } | ||
| 687 | }; | ||
| 688 | |||
| 689 | using BlendEquation = decltype(params)::BlendEquation; | ||
| 690 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | ||
| 691 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | ||
| 692 | BlendEquation equation) { | ||
| 693 | Math::Vec4<int> result; | ||
| 694 | |||
| 695 | auto src_result = (src * srcfactor).Cast<int>(); | ||
| 696 | auto dst_result = (dest * destfactor).Cast<int>(); | ||
| 697 | |||
| 698 | switch (equation) { | ||
| 699 | case BlendEquation::Add: | ||
| 700 | result = (src_result + dst_result) / 255; | ||
| 611 | break; | 701 | break; |
| 702 | |||
| 703 | case BlendEquation::Subtract: | ||
| 704 | result = (src_result - dst_result) / 255; | ||
| 705 | break; | ||
| 706 | |||
| 707 | case BlendEquation::ReverseSubtract: | ||
| 708 | result = (dst_result - src_result) / 255; | ||
| 709 | break; | ||
| 710 | |||
| 711 | // TODO: How do these two actually work? | ||
| 712 | // OpenGL doesn't include the blend factors in the min/max computations, | ||
| 713 | // but is this what the 3DS actually does? | ||
| 714 | case BlendEquation::Min: | ||
| 715 | result.r() = std::min(src.r(), dest.r()); | ||
| 716 | result.g() = std::min(src.g(), dest.g()); | ||
| 717 | result.b() = std::min(src.b(), dest.b()); | ||
| 718 | result.a() = std::min(src.a(), dest.a()); | ||
| 719 | break; | ||
| 720 | |||
| 721 | case BlendEquation::Max: | ||
| 722 | result.r() = std::max(src.r(), dest.r()); | ||
| 723 | result.g() = std::max(src.g(), dest.g()); | ||
| 724 | result.b() = std::max(src.b(), dest.b()); | ||
| 725 | result.a() = std::max(src.a(), dest.a()); | ||
| 726 | break; | ||
| 727 | |||
| 728 | default: | ||
| 729 | LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); | ||
| 730 | UNIMPLEMENTED(); | ||
| 612 | } | 731 | } |
| 732 | |||
| 733 | return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), | ||
| 734 | MathUtil::Clamp(result.g(), 0, 255), | ||
| 735 | MathUtil::Clamp(result.b(), 0, 255), | ||
| 736 | MathUtil::Clamp(result.a(), 0, 255)); | ||
| 613 | }; | 737 | }; |
| 614 | 738 | ||
| 615 | auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), | 739 | auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), |
| 616 | LookupFactorA(params.factor_source_a)); | 740 | LookupFactorA(params.factor_source_a)); |
| 617 | auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), | 741 | auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), |
| 618 | LookupFactorA(params.factor_dest_a)); | 742 | LookupFactorA(params.factor_dest_a)); |
| 619 | |||
| 620 | auto src_result = (combiner_output * srcfactor).Cast<int>(); | ||
| 621 | auto dst_result = (dest * dstfactor).Cast<int>(); | ||
| 622 | |||
| 623 | switch (params.blend_equation_rgb) { | ||
| 624 | case params.Add: | ||
| 625 | { | ||
| 626 | auto result = (src_result + dst_result) / 255; | ||
| 627 | result.r() = std::min(255, result.r()); | ||
| 628 | result.g() = std::min(255, result.g()); | ||
| 629 | result.b() = std::min(255, result.b()); | ||
| 630 | combiner_output = result.Cast<u8>(); | ||
| 631 | break; | ||
| 632 | } | ||
| 633 | |||
| 634 | case params.Subtract: | ||
| 635 | { | ||
| 636 | auto result = (src_result - dst_result) / 255; | ||
| 637 | result.r() = std::max(0, result.r()); | ||
| 638 | result.g() = std::max(0, result.g()); | ||
| 639 | result.b() = std::max(0, result.b()); | ||
| 640 | combiner_output = result.Cast<u8>(); | ||
| 641 | break; | ||
| 642 | } | ||
| 643 | |||
| 644 | case params.ReverseSubtract: | ||
| 645 | { | ||
| 646 | auto result = (dst_result - src_result) / 255; | ||
| 647 | result.r() = std::max(0, result.r()); | ||
| 648 | result.g() = std::max(0, result.g()); | ||
| 649 | result.b() = std::max(0, result.b()); | ||
| 650 | combiner_output = result.Cast<u8>(); | ||
| 651 | break; | ||
| 652 | } | ||
| 653 | |||
| 654 | case params.Min: | ||
| 655 | { | ||
| 656 | // TODO: GL spec says to do it without the factors, but is this what the 3DS does? | ||
| 657 | Math::Vec4<int> result; | ||
| 658 | result.r() = std::min(combiner_output.r(),dest.r()); | ||
| 659 | result.g() = std::min(combiner_output.g(),dest.g()); | ||
| 660 | result.b() = std::min(combiner_output.b(),dest.b()); | ||
| 661 | combiner_output = result.Cast<u8>(); | ||
| 662 | break; | ||
| 663 | } | ||
| 664 | |||
| 665 | case params.Max: | ||
| 666 | { | ||
| 667 | // TODO: GL spec says to do it without the factors, but is this what the 3DS does? | ||
| 668 | Math::Vec4<int> result; | ||
| 669 | result.r() = std::max(combiner_output.r(),dest.r()); | ||
| 670 | result.g() = std::max(combiner_output.g(),dest.g()); | ||
| 671 | result.b() = std::max(combiner_output.b(),dest.b()); | ||
| 672 | combiner_output = result.Cast<u8>(); | ||
| 673 | break; | ||
| 674 | } | ||
| 675 | 743 | ||
| 676 | default: | 744 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); |
| 677 | LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value()); | 745 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); |
| 678 | exit(0); | ||
| 679 | } | ||
| 680 | } else { | 746 | } else { |
| 681 | LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); | 747 | LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); |
| 682 | exit(0); | 748 | UNIMPLEMENTED(); |
| 683 | } | 749 | } |
| 684 | 750 | ||
| 685 | const Math::Vec4<u8> result = { | 751 | const Math::Vec4<u8> result = { |
| 686 | registers.output_merger.red_enable ? combiner_output.r() : dest.r(), | 752 | registers.output_merger.red_enable ? blend_output.r() : dest.r(), |
| 687 | registers.output_merger.green_enable ? combiner_output.g() : dest.g(), | 753 | registers.output_merger.green_enable ? blend_output.g() : dest.g(), |
| 688 | registers.output_merger.blue_enable ? combiner_output.b() : dest.b(), | 754 | registers.output_merger.blue_enable ? blend_output.b() : dest.b(), |
| 689 | registers.output_merger.alpha_enable ? combiner_output.a() : dest.a() | 755 | registers.output_merger.alpha_enable ? blend_output.a() : dest.a() |
| 690 | }; | 756 | }; |
| 691 | 757 | ||
| 692 | DrawPixel(x >> 4, y >> 4, result); | 758 | DrawPixel(x >> 4, y >> 4, result); |
| @@ -694,6 +760,12 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, | |||
| 694 | } | 760 | } |
| 695 | } | 761 | } |
| 696 | 762 | ||
| 763 | void ProcessTriangle(const VertexShader::OutputVertex& v0, | ||
| 764 | const VertexShader::OutputVertex& v1, | ||
| 765 | const VertexShader::OutputVertex& v2) { | ||
| 766 | ProcessTriangleInternal(v0, v1, v2); | ||
| 767 | } | ||
| 768 | |||
| 697 | } // namespace Rasterizer | 769 | } // namespace Rasterizer |
| 698 | 770 | ||
| 699 | } // namespace Pica | 771 | } // namespace Pica |