diff options
| author | 2015-05-22 23:27:41 -0400 | |
|---|---|---|
| committer | 2015-05-22 23:27:41 -0400 | |
| commit | a7946f9027a87da93cd76ec46d54cadf4203b082 (patch) | |
| tree | 01e98e937a3ed9d8c65b270df016658f8ae89a97 /src/video_core/rasterizer.cpp | |
| parent | Merge pull request #801 from purpasmart96/hid_stubs (diff) | |
| parent | Pica: Create 'State' structure and move state memory there. (diff) | |
| download | yuzu-a7946f9027a87da93cd76ec46d54cadf4203b082.tar.gz yuzu-a7946f9027a87da93cd76ec46d54cadf4203b082.tar.xz yuzu-a7946f9027a87da93cd76ec46d54cadf4203b082.zip | |
Merge pull request #776 from bunnei/pica-state
GPU: Consolidate Pica state
Diffstat (limited to 'src/video_core/rasterizer.cpp')
| -rw-r--r-- | src/video_core/rasterizer.cpp | 244 |
1 files changed, 125 insertions, 119 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 362efe52e..767ff4205 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -24,72 +24,74 @@ namespace Pica { | |||
| 24 | namespace Rasterizer { | 24 | namespace Rasterizer { |
| 25 | 25 | ||
| 26 | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | 26 | static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { |
| 27 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); | 27 | const auto& framebuffer = g_state.regs.framebuffer; |
| 28 | const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||
| 28 | 29 | ||
| 29 | // Similarly to textures, the render framebuffer is laid out from bottom to top, too. | 30 | // Similarly to textures, the render framebuffer is laid out from bottom to top, too. |
| 30 | // NOTE: The framebuffer height register contains the actual FB height minus one. | 31 | // NOTE: The framebuffer height register contains the actual FB height minus one. |
| 31 | y = (registers.framebuffer.height - y); | 32 | y = framebuffer.height - y; |
| 32 | 33 | ||
| 33 | const u32 coarse_y = y & ~7; | 34 | const u32 coarse_y = y & ~7; |
| 34 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); | 35 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |
| 35 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; | 36 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; |
| 36 | u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; | 37 | u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; |
| 37 | 38 | ||
| 38 | switch (registers.framebuffer.color_format) { | 39 | switch (framebuffer.color_format) { |
| 39 | case Pica::Regs::ColorFormat::RGBA8: | 40 | case Regs::ColorFormat::RGBA8: |
| 40 | Color::EncodeRGBA8(color, dst_pixel); | 41 | Color::EncodeRGBA8(color, dst_pixel); |
| 41 | break; | 42 | break; |
| 42 | 43 | ||
| 43 | case Pica::Regs::ColorFormat::RGB8: | 44 | case Regs::ColorFormat::RGB8: |
| 44 | Color::EncodeRGB8(color, dst_pixel); | 45 | Color::EncodeRGB8(color, dst_pixel); |
| 45 | break; | 46 | break; |
| 46 | 47 | ||
| 47 | case Pica::Regs::ColorFormat::RGB5A1: | 48 | case Regs::ColorFormat::RGB5A1: |
| 48 | Color::EncodeRGB5A1(color, dst_pixel); | 49 | Color::EncodeRGB5A1(color, dst_pixel); |
| 49 | break; | 50 | break; |
| 50 | 51 | ||
| 51 | case Pica::Regs::ColorFormat::RGB565: | 52 | case Regs::ColorFormat::RGB565: |
| 52 | Color::EncodeRGB565(color, dst_pixel); | 53 | Color::EncodeRGB565(color, dst_pixel); |
| 53 | break; | 54 | break; |
| 54 | 55 | ||
| 55 | case Pica::Regs::ColorFormat::RGBA4: | 56 | case Regs::ColorFormat::RGBA4: |
| 56 | Color::EncodeRGBA4(color, dst_pixel); | 57 | Color::EncodeRGBA4(color, dst_pixel); |
| 57 | break; | 58 | break; |
| 58 | 59 | ||
| 59 | default: | 60 | default: |
| 60 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value()); | 61 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); |
| 61 | UNIMPLEMENTED(); | 62 | UNIMPLEMENTED(); |
| 62 | } | 63 | } |
| 63 | } | 64 | } |
| 64 | 65 | ||
| 65 | static const Math::Vec4<u8> GetPixel(int x, int y) { | 66 | static const Math::Vec4<u8> GetPixel(int x, int y) { |
| 66 | const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); | 67 | const auto& framebuffer = g_state.regs.framebuffer; |
| 68 | const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); | ||
| 67 | 69 | ||
| 68 | y = (registers.framebuffer.height - y); | 70 | y = framebuffer.height - y; |
| 69 | 71 | ||
| 70 | const u32 coarse_y = y & ~7; | 72 | const u32 coarse_y = y & ~7; |
| 71 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); | 73 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |
| 72 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; | 74 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; |
| 73 | u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; | 75 | u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; |
| 74 | 76 | ||
| 75 | switch (registers.framebuffer.color_format) { | 77 | switch (framebuffer.color_format) { |
| 76 | case Pica::Regs::ColorFormat::RGBA8: | 78 | case Regs::ColorFormat::RGBA8: |
| 77 | return Color::DecodeRGBA8(src_pixel); | 79 | return Color::DecodeRGBA8(src_pixel); |
| 78 | 80 | ||
| 79 | case Pica::Regs::ColorFormat::RGB8: | 81 | case Regs::ColorFormat::RGB8: |
| 80 | return Color::DecodeRGB8(src_pixel); | 82 | return Color::DecodeRGB8(src_pixel); |
| 81 | 83 | ||
| 82 | case Pica::Regs::ColorFormat::RGB5A1: | 84 | case Regs::ColorFormat::RGB5A1: |
| 83 | return Color::DecodeRGB5A1(src_pixel); | 85 | return Color::DecodeRGB5A1(src_pixel); |
| 84 | 86 | ||
| 85 | case Pica::Regs::ColorFormat::RGB565: | 87 | case Regs::ColorFormat::RGB565: |
| 86 | return Color::DecodeRGB565(src_pixel); | 88 | return Color::DecodeRGB565(src_pixel); |
| 87 | 89 | ||
| 88 | case Pica::Regs::ColorFormat::RGBA4: | 90 | case Regs::ColorFormat::RGBA4: |
| 89 | return Color::DecodeRGBA4(src_pixel); | 91 | return Color::DecodeRGBA4(src_pixel); |
| 90 | 92 | ||
| 91 | default: | 93 | default: |
| 92 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value()); | 94 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); |
| 93 | UNIMPLEMENTED(); | 95 | UNIMPLEMENTED(); |
| 94 | } | 96 | } |
| 95 | 97 | ||
| @@ -97,58 +99,60 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 97 | } | 99 | } |
| 98 | 100 | ||
| 99 | static u32 GetDepth(int x, int y) { | 101 | static u32 GetDepth(int x, int y) { |
| 100 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | 102 | const auto& framebuffer = g_state.regs.framebuffer; |
| 103 | const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||
| 101 | u8* depth_buffer = Memory::GetPhysicalPointer(addr); | 104 | u8* depth_buffer = Memory::GetPhysicalPointer(addr); |
| 102 | 105 | ||
| 103 | y = (registers.framebuffer.height - y); | 106 | y = framebuffer.height - y; |
| 104 | 107 | ||
| 105 | const u32 coarse_y = y & ~7; | 108 | const u32 coarse_y = y & ~7; |
| 106 | u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format); | 109 | u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format); |
| 107 | u32 stride = registers.framebuffer.width * bytes_per_pixel; | 110 | u32 stride = framebuffer.width * bytes_per_pixel; |
| 108 | 111 | ||
| 109 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | 112 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |
| 110 | u8* src_pixel = depth_buffer + src_offset; | 113 | u8* src_pixel = depth_buffer + src_offset; |
| 111 | 114 | ||
| 112 | switch (registers.framebuffer.depth_format) { | 115 | switch (framebuffer.depth_format) { |
| 113 | case Pica::Regs::DepthFormat::D16: | 116 | case Regs::DepthFormat::D16: |
| 114 | return Color::DecodeD16(src_pixel); | 117 | return Color::DecodeD16(src_pixel); |
| 115 | case Pica::Regs::DepthFormat::D24: | 118 | case Regs::DepthFormat::D24: |
| 116 | return Color::DecodeD24(src_pixel); | 119 | return Color::DecodeD24(src_pixel); |
| 117 | case Pica::Regs::DepthFormat::D24S8: | 120 | case Regs::DepthFormat::D24S8: |
| 118 | return Color::DecodeD24S8(src_pixel).x; | 121 | return Color::DecodeD24S8(src_pixel).x; |
| 119 | default: | 122 | default: |
| 120 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format); | 123 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 121 | UNIMPLEMENTED(); | 124 | UNIMPLEMENTED(); |
| 122 | return 0; | 125 | return 0; |
| 123 | } | 126 | } |
| 124 | } | 127 | } |
| 125 | 128 | ||
| 126 | static void SetDepth(int x, int y, u32 value) { | 129 | static void SetDepth(int x, int y, u32 value) { |
| 127 | const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); | 130 | const auto& framebuffer = g_state.regs.framebuffer; |
| 131 | const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); | ||
| 128 | u8* depth_buffer = Memory::GetPhysicalPointer(addr); | 132 | u8* depth_buffer = Memory::GetPhysicalPointer(addr); |
| 129 | 133 | ||
| 130 | y = (registers.framebuffer.height - y); | 134 | y = framebuffer.height - y; |
| 131 | 135 | ||
| 132 | const u32 coarse_y = y & ~7; | 136 | const u32 coarse_y = y & ~7; |
| 133 | u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format); | 137 | u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format); |
| 134 | u32 stride = registers.framebuffer.width * bytes_per_pixel; | 138 | u32 stride = framebuffer.width * bytes_per_pixel; |
| 135 | 139 | ||
| 136 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; | 140 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; |
| 137 | u8* dst_pixel = depth_buffer + dst_offset; | 141 | u8* dst_pixel = depth_buffer + dst_offset; |
| 138 | 142 | ||
| 139 | switch (registers.framebuffer.depth_format) { | 143 | switch (framebuffer.depth_format) { |
| 140 | case Pica::Regs::DepthFormat::D16: | 144 | case Regs::DepthFormat::D16: |
| 141 | Color::EncodeD16(value, dst_pixel); | 145 | Color::EncodeD16(value, dst_pixel); |
| 142 | break; | 146 | break; |
| 143 | case Pica::Regs::DepthFormat::D24: | 147 | case Regs::DepthFormat::D24: |
| 144 | Color::EncodeD24(value, dst_pixel); | 148 | Color::EncodeD24(value, dst_pixel); |
| 145 | break; | 149 | break; |
| 146 | case Pica::Regs::DepthFormat::D24S8: | 150 | case Regs::DepthFormat::D24S8: |
| 147 | // TODO(Subv): Implement the stencil buffer | 151 | // TODO(Subv): Implement the stencil buffer |
| 148 | Color::EncodeD24S8(value, 0, dst_pixel); | 152 | Color::EncodeD24S8(value, 0, dst_pixel); |
| 149 | break; | 153 | break; |
| 150 | default: | 154 | default: |
| 151 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format); | 155 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 152 | UNIMPLEMENTED(); | 156 | UNIMPLEMENTED(); |
| 153 | break; | 157 | break; |
| 154 | } | 158 | } |
| @@ -200,6 +204,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 200 | const VertexShader::OutputVertex& v2, | 204 | const VertexShader::OutputVertex& v2, |
| 201 | bool reversed = false) | 205 | bool reversed = false) |
| 202 | { | 206 | { |
| 207 | const auto& regs = g_state.regs; | ||
| 203 | Common::Profiling::ScopeTimer timer(rasterization_category); | 208 | Common::Profiling::ScopeTimer timer(rasterization_category); |
| 204 | 209 | ||
| 205 | // vertex positions in rasterizer coordinates | 210 | // vertex positions in rasterizer coordinates |
| @@ -216,14 +221,14 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 216 | ScreenToRasterizerCoordinates(v1.screenpos), | 221 | ScreenToRasterizerCoordinates(v1.screenpos), |
| 217 | ScreenToRasterizerCoordinates(v2.screenpos) }; | 222 | ScreenToRasterizerCoordinates(v2.screenpos) }; |
| 218 | 223 | ||
| 219 | if (registers.cull_mode == Regs::CullMode::KeepAll) { | 224 | if (regs.cull_mode == Regs::CullMode::KeepAll) { |
| 220 | // Make sure we always end up with a triangle wound counter-clockwise | 225 | // Make sure we always end up with a triangle wound counter-clockwise |
| 221 | if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { | 226 | if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { |
| 222 | ProcessTriangleInternal(v0, v2, v1, true); | 227 | ProcessTriangleInternal(v0, v2, v1, true); |
| 223 | return; | 228 | return; |
| 224 | } | 229 | } |
| 225 | } else { | 230 | } else { |
| 226 | if (!reversed && registers.cull_mode == Regs::CullMode::KeepClockWise) { | 231 | if (!reversed && regs.cull_mode == Regs::CullMode::KeepClockWise) { |
| 227 | // Reverse vertex order and use the CCW code path. | 232 | // Reverse vertex order and use the CCW code path. |
| 228 | ProcessTriangleInternal(v0, v2, v1, true); | 233 | ProcessTriangleInternal(v0, v2, v1, true); |
| 229 | return; | 234 | return; |
| @@ -268,8 +273,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 268 | 273 | ||
| 269 | auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); | 274 | auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); |
| 270 | 275 | ||
| 271 | auto textures = registers.GetTextures(); | 276 | auto textures = regs.GetTextures(); |
| 272 | auto tev_stages = registers.GetTevStages(); | 277 | auto tev_stages = regs.GetTevStages(); |
| 273 | 278 | ||
| 274 | // Enter rasterization loop, starting at the center of the topleft bounding box corner. | 279 | // Enter rasterization loop, starting at the center of the topleft bounding box corner. |
| 275 | // TODO: Not sure if looping through x first might be faster | 280 | // TODO: Not sure if looping through x first might be faster |
| @@ -384,8 +389,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 384 | // analogously. | 389 | // analogously. |
| 385 | Math::Vec4<u8> combiner_output; | 390 | Math::Vec4<u8> combiner_output; |
| 386 | Math::Vec4<u8> combiner_buffer = { | 391 | Math::Vec4<u8> combiner_buffer = { |
| 387 | registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g, | 392 | regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, |
| 388 | registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a | 393 | regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a |
| 389 | }; | 394 | }; |
| 390 | 395 | ||
| 391 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | 396 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { |
| @@ -609,51 +614,52 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 609 | combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); | 614 | combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); |
| 610 | combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); | 615 | combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); |
| 611 | 616 | ||
| 612 | if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { | 617 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { |
| 613 | combiner_buffer.r() = combiner_output.r(); | 618 | combiner_buffer.r() = combiner_output.r(); |
| 614 | combiner_buffer.g() = combiner_output.g(); | 619 | combiner_buffer.g() = combiner_output.g(); |
| 615 | combiner_buffer.b() = combiner_output.b(); | 620 | combiner_buffer.b() = combiner_output.b(); |
| 616 | } | 621 | } |
| 617 | 622 | ||
| 618 | if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { | 623 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { |
| 619 | combiner_buffer.a() = combiner_output.a(); | 624 | combiner_buffer.a() = combiner_output.a(); |
| 620 | } | 625 | } |
| 621 | } | 626 | } |
| 622 | 627 | ||
| 623 | if (registers.output_merger.alpha_test.enable) { | 628 | const auto& output_merger = regs.output_merger; |
| 629 | if (output_merger.alpha_test.enable) { | ||
| 624 | bool pass = false; | 630 | bool pass = false; |
| 625 | 631 | ||
| 626 | switch (registers.output_merger.alpha_test.func) { | 632 | switch (output_merger.alpha_test.func) { |
| 627 | case registers.output_merger.Never: | 633 | case Regs::CompareFunc::Never: |
| 628 | pass = false; | 634 | pass = false; |
| 629 | break; | 635 | break; |
| 630 | 636 | ||
| 631 | case registers.output_merger.Always: | 637 | case Regs::CompareFunc::Always: |
| 632 | pass = true; | 638 | pass = true; |
| 633 | break; | 639 | break; |
| 634 | 640 | ||
| 635 | case registers.output_merger.Equal: | 641 | case Regs::CompareFunc::Equal: |
| 636 | pass = combiner_output.a() == registers.output_merger.alpha_test.ref; | 642 | pass = combiner_output.a() == output_merger.alpha_test.ref; |
| 637 | break; | 643 | break; |
| 638 | 644 | ||
| 639 | case registers.output_merger.NotEqual: | 645 | case Regs::CompareFunc::NotEqual: |
| 640 | pass = combiner_output.a() != registers.output_merger.alpha_test.ref; | 646 | pass = combiner_output.a() != output_merger.alpha_test.ref; |
| 641 | break; | 647 | break; |
| 642 | 648 | ||
| 643 | case registers.output_merger.LessThan: | 649 | case Regs::CompareFunc::LessThan: |
| 644 | pass = combiner_output.a() < registers.output_merger.alpha_test.ref; | 650 | pass = combiner_output.a() < output_merger.alpha_test.ref; |
| 645 | break; | 651 | break; |
| 646 | 652 | ||
| 647 | case registers.output_merger.LessThanOrEqual: | 653 | case Regs::CompareFunc::LessThanOrEqual: |
| 648 | pass = combiner_output.a() <= registers.output_merger.alpha_test.ref; | 654 | pass = combiner_output.a() <= output_merger.alpha_test.ref; |
| 649 | break; | 655 | break; |
| 650 | 656 | ||
| 651 | case registers.output_merger.GreaterThan: | 657 | case Regs::CompareFunc::GreaterThan: |
| 652 | pass = combiner_output.a() > registers.output_merger.alpha_test.ref; | 658 | pass = combiner_output.a() > output_merger.alpha_test.ref; |
| 653 | break; | 659 | break; |
| 654 | 660 | ||
| 655 | case registers.output_merger.GreaterThanOrEqual: | 661 | case Regs::CompareFunc::GreaterThanOrEqual: |
| 656 | pass = combiner_output.a() >= registers.output_merger.alpha_test.ref; | 662 | pass = combiner_output.a() >= output_merger.alpha_test.ref; |
| 657 | break; | 663 | break; |
| 658 | } | 664 | } |
| 659 | 665 | ||
| @@ -662,8 +668,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 662 | } | 668 | } |
| 663 | 669 | ||
| 664 | // TODO: Does depth indeed only get written even if depth testing is enabled? | 670 | // TODO: Does depth indeed only get written even if depth testing is enabled? |
| 665 | if (registers.output_merger.depth_test_enable) { | 671 | if (output_merger.depth_test_enable) { |
| 666 | unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format); | 672 | unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); |
| 667 | u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + | 673 | u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + |
| 668 | v1.screenpos[2].ToFloat32() * w1 + | 674 | v1.screenpos[2].ToFloat32() * w1 + |
| 669 | v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); | 675 | v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); |
| @@ -671,36 +677,36 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 671 | 677 | ||
| 672 | bool pass = false; | 678 | bool pass = false; |
| 673 | 679 | ||
| 674 | switch (registers.output_merger.depth_test_func) { | 680 | switch (output_merger.depth_test_func) { |
| 675 | case registers.output_merger.Never: | 681 | case Regs::CompareFunc::Never: |
| 676 | pass = false; | 682 | pass = false; |
| 677 | break; | 683 | break; |
| 678 | 684 | ||
| 679 | case registers.output_merger.Always: | 685 | case Regs::CompareFunc::Always: |
| 680 | pass = true; | 686 | pass = true; |
| 681 | break; | 687 | break; |
| 682 | 688 | ||
| 683 | case registers.output_merger.Equal: | 689 | case Regs::CompareFunc::Equal: |
| 684 | pass = z == ref_z; | 690 | pass = z == ref_z; |
| 685 | break; | 691 | break; |
| 686 | 692 | ||
| 687 | case registers.output_merger.NotEqual: | 693 | case Regs::CompareFunc::NotEqual: |
| 688 | pass = z != ref_z; | 694 | pass = z != ref_z; |
| 689 | break; | 695 | break; |
| 690 | 696 | ||
| 691 | case registers.output_merger.LessThan: | 697 | case Regs::CompareFunc::LessThan: |
| 692 | pass = z < ref_z; | 698 | pass = z < ref_z; |
| 693 | break; | 699 | break; |
| 694 | 700 | ||
| 695 | case registers.output_merger.LessThanOrEqual: | 701 | case Regs::CompareFunc::LessThanOrEqual: |
| 696 | pass = z <= ref_z; | 702 | pass = z <= ref_z; |
| 697 | break; | 703 | break; |
| 698 | 704 | ||
| 699 | case registers.output_merger.GreaterThan: | 705 | case Regs::CompareFunc::GreaterThan: |
| 700 | pass = z > ref_z; | 706 | pass = z > ref_z; |
| 701 | break; | 707 | break; |
| 702 | 708 | ||
| 703 | case registers.output_merger.GreaterThanOrEqual: | 709 | case Regs::CompareFunc::GreaterThanOrEqual: |
| 704 | pass = z >= ref_z; | 710 | pass = z >= ref_z; |
| 705 | break; | 711 | break; |
| 706 | } | 712 | } |
| @@ -708,59 +714,59 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 708 | if (!pass) | 714 | if (!pass) |
| 709 | continue; | 715 | continue; |
| 710 | 716 | ||
| 711 | if (registers.output_merger.depth_write_enable) | 717 | if (output_merger.depth_write_enable) |
| 712 | SetDepth(x >> 4, y >> 4, z); | 718 | SetDepth(x >> 4, y >> 4, z); |
| 713 | } | 719 | } |
| 714 | 720 | ||
| 715 | auto dest = GetPixel(x >> 4, y >> 4); | 721 | auto dest = GetPixel(x >> 4, y >> 4); |
| 716 | Math::Vec4<u8> blend_output = combiner_output; | 722 | Math::Vec4<u8> blend_output = combiner_output; |
| 717 | 723 | ||
| 718 | if (registers.output_merger.alphablend_enable) { | 724 | if (output_merger.alphablend_enable) { |
| 719 | auto params = registers.output_merger.alpha_blending; | 725 | auto params = output_merger.alpha_blending; |
| 720 | 726 | ||
| 721 | auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> { | 727 | auto LookupFactorRGB = [&](Regs::BlendFactor factor) -> Math::Vec3<u8> { |
| 722 | switch (factor) { | 728 | switch (factor) { |
| 723 | case params.Zero: | 729 | case Regs::BlendFactor::Zero : |
| 724 | return Math::Vec3<u8>(0, 0, 0); | 730 | return Math::Vec3<u8>(0, 0, 0); |
| 725 | 731 | ||
| 726 | case params.One: | 732 | case Regs::BlendFactor::One : |
| 727 | return Math::Vec3<u8>(255, 255, 255); | 733 | return Math::Vec3<u8>(255, 255, 255); |
| 728 | 734 | ||
| 729 | case params.SourceColor: | 735 | case Regs::BlendFactor::SourceColor: |
| 730 | return combiner_output.rgb(); | 736 | return combiner_output.rgb(); |
| 731 | 737 | ||
| 732 | case params.OneMinusSourceColor: | 738 | case Regs::BlendFactor::OneMinusSourceColor: |
| 733 | return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); | 739 | return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); |
| 734 | 740 | ||
| 735 | case params.DestColor: | 741 | case Regs::BlendFactor::DestColor: |
| 736 | return dest.rgb(); | 742 | return dest.rgb(); |
| 737 | 743 | ||
| 738 | case params.OneMinusDestColor: | 744 | case Regs::BlendFactor::OneMinusDestColor: |
| 739 | return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); | 745 | return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); |
| 740 | 746 | ||
| 741 | case params.SourceAlpha: | 747 | case Regs::BlendFactor::SourceAlpha: |
| 742 | return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a()); | 748 | return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a()); |
| 743 | 749 | ||
| 744 | case params.OneMinusSourceAlpha: | 750 | case Regs::BlendFactor::OneMinusSourceAlpha: |
| 745 | return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); | 751 | return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); |
| 746 | 752 | ||
| 747 | case params.DestAlpha: | 753 | case Regs::BlendFactor::DestAlpha: |
| 748 | return Math::Vec3<u8>(dest.a(), dest.a(), dest.a()); | 754 | return Math::Vec3<u8>(dest.a(), dest.a(), dest.a()); |
| 749 | 755 | ||
| 750 | case params.OneMinusDestAlpha: | 756 | case Regs::BlendFactor::OneMinusDestAlpha: |
| 751 | return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); | 757 | return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); |
| 752 | 758 | ||
| 753 | case params.ConstantColor: | 759 | case Regs::BlendFactor::ConstantColor: |
| 754 | return Math::Vec3<u8>(registers.output_merger.blend_const.r, registers.output_merger.blend_const.g, registers.output_merger.blend_const.b); | 760 | return Math::Vec3<u8>(output_merger.blend_const.r, output_merger.blend_const.g, output_merger.blend_const.b); |
| 755 | 761 | ||
| 756 | case params.OneMinusConstantColor: | 762 | case Regs::BlendFactor::OneMinusConstantColor: |
| 757 | return Math::Vec3<u8>(255 - registers.output_merger.blend_const.r, 255 - registers.output_merger.blend_const.g, 255 - registers.output_merger.blend_const.b); | 763 | return Math::Vec3<u8>(255 - output_merger.blend_const.r, 255 - output_merger.blend_const.g, 255 - output_merger.blend_const.b); |
| 758 | 764 | ||
| 759 | case params.ConstantAlpha: | 765 | case Regs::BlendFactor::ConstantAlpha: |
| 760 | return Math::Vec3<u8>(registers.output_merger.blend_const.a, registers.output_merger.blend_const.a, registers.output_merger.blend_const.a); | 766 | return Math::Vec3<u8>(output_merger.blend_const.a, output_merger.blend_const.a, output_merger.blend_const.a); |
| 761 | 767 | ||
| 762 | case params.OneMinusConstantAlpha: | 768 | case Regs::BlendFactor::OneMinusConstantAlpha: |
| 763 | return Math::Vec3<u8>(255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a); | 769 | return Math::Vec3<u8>(255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a); |
| 764 | 770 | ||
| 765 | default: | 771 | default: |
| 766 | LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); | 772 | LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); |
| @@ -769,31 +775,31 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 769 | } | 775 | } |
| 770 | }; | 776 | }; |
| 771 | 777 | ||
| 772 | auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 { | 778 | auto LookupFactorA = [&](Regs::BlendFactor factor) -> u8 { |
| 773 | switch (factor) { | 779 | switch (factor) { |
| 774 | case params.Zero: | 780 | case Regs::BlendFactor::Zero: |
| 775 | return 0; | 781 | return 0; |
| 776 | 782 | ||
| 777 | case params.One: | 783 | case Regs::BlendFactor::One: |
| 778 | return 255; | 784 | return 255; |
| 779 | 785 | ||
| 780 | case params.SourceAlpha: | 786 | case Regs::BlendFactor::SourceAlpha: |
| 781 | return combiner_output.a(); | 787 | return combiner_output.a(); |
| 782 | 788 | ||
| 783 | case params.OneMinusSourceAlpha: | 789 | case Regs::BlendFactor::OneMinusSourceAlpha: |
| 784 | return 255 - combiner_output.a(); | 790 | return 255 - combiner_output.a(); |
| 785 | 791 | ||
| 786 | case params.DestAlpha: | 792 | case Regs::BlendFactor::DestAlpha: |
| 787 | return dest.a(); | 793 | return dest.a(); |
| 788 | 794 | ||
| 789 | case params.OneMinusDestAlpha: | 795 | case Regs::BlendFactor::OneMinusDestAlpha: |
| 790 | return 255 - dest.a(); | 796 | return 255 - dest.a(); |
| 791 | 797 | ||
| 792 | case params.ConstantAlpha: | 798 | case Regs::BlendFactor::ConstantAlpha: |
| 793 | return registers.output_merger.blend_const.a; | 799 | return output_merger.blend_const.a; |
| 794 | 800 | ||
| 795 | case params.OneMinusConstantAlpha: | 801 | case Regs::BlendFactor::OneMinusConstantAlpha: |
| 796 | return 255 - registers.output_merger.blend_const.a; | 802 | return 255 - output_merger.blend_const.a; |
| 797 | 803 | ||
| 798 | default: | 804 | default: |
| 799 | LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); | 805 | LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); |
| @@ -802,7 +808,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 802 | } | 808 | } |
| 803 | }; | 809 | }; |
| 804 | 810 | ||
| 805 | using BlendEquation = decltype(params)::BlendEquation; | 811 | using BlendEquation = Regs::BlendEquation; |
| 806 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | 812 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, |
| 807 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | 813 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, |
| 808 | BlendEquation equation) { | 814 | BlendEquation equation) { |
| @@ -812,29 +818,29 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 812 | auto dst_result = (dest * destfactor).Cast<int>(); | 818 | auto dst_result = (dest * destfactor).Cast<int>(); |
| 813 | 819 | ||
| 814 | switch (equation) { | 820 | switch (equation) { |
| 815 | case BlendEquation::Add: | 821 | case Regs::BlendEquation::Add: |
| 816 | result = (src_result + dst_result) / 255; | 822 | result = (src_result + dst_result) / 255; |
| 817 | break; | 823 | break; |
| 818 | 824 | ||
| 819 | case BlendEquation::Subtract: | 825 | case Regs::BlendEquation::Subtract: |
| 820 | result = (src_result - dst_result) / 255; | 826 | result = (src_result - dst_result) / 255; |
| 821 | break; | 827 | break; |
| 822 | 828 | ||
| 823 | case BlendEquation::ReverseSubtract: | 829 | case Regs::BlendEquation::ReverseSubtract: |
| 824 | result = (dst_result - src_result) / 255; | 830 | result = (dst_result - src_result) / 255; |
| 825 | break; | 831 | break; |
| 826 | 832 | ||
| 827 | // TODO: How do these two actually work? | 833 | // TODO: How do these two actually work? |
| 828 | // OpenGL doesn't include the blend factors in the min/max computations, | 834 | // OpenGL doesn't include the blend factors in the min/max computations, |
| 829 | // but is this what the 3DS actually does? | 835 | // but is this what the 3DS actually does? |
| 830 | case BlendEquation::Min: | 836 | case Regs::BlendEquation::Min: |
| 831 | result.r() = std::min(src.r(), dest.r()); | 837 | result.r() = std::min(src.r(), dest.r()); |
| 832 | result.g() = std::min(src.g(), dest.g()); | 838 | result.g() = std::min(src.g(), dest.g()); |
| 833 | result.b() = std::min(src.b(), dest.b()); | 839 | result.b() = std::min(src.b(), dest.b()); |
| 834 | result.a() = std::min(src.a(), dest.a()); | 840 | result.a() = std::min(src.a(), dest.a()); |
| 835 | break; | 841 | break; |
| 836 | 842 | ||
| 837 | case BlendEquation::Max: | 843 | case Regs::BlendEquation::Max: |
| 838 | result.r() = std::max(src.r(), dest.r()); | 844 | result.r() = std::max(src.r(), dest.r()); |
| 839 | result.g() = std::max(src.g(), dest.g()); | 845 | result.g() = std::max(src.g(), dest.g()); |
| 840 | result.b() = std::max(src.b(), dest.b()); | 846 | result.b() = std::max(src.b(), dest.b()); |
| @@ -860,15 +866,15 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 860 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); | 866 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); |
| 861 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); | 867 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); |
| 862 | } else { | 868 | } else { |
| 863 | LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); | 869 | LOG_CRITICAL(HW_GPU, "logic op: %x", output_merger.logic_op); |
| 864 | UNIMPLEMENTED(); | 870 | UNIMPLEMENTED(); |
| 865 | } | 871 | } |
| 866 | 872 | ||
| 867 | const Math::Vec4<u8> result = { | 873 | const Math::Vec4<u8> result = { |
| 868 | registers.output_merger.red_enable ? blend_output.r() : dest.r(), | 874 | output_merger.red_enable ? blend_output.r() : dest.r(), |
| 869 | registers.output_merger.green_enable ? blend_output.g() : dest.g(), | 875 | output_merger.green_enable ? blend_output.g() : dest.g(), |
| 870 | registers.output_merger.blue_enable ? blend_output.b() : dest.b(), | 876 | output_merger.blue_enable ? blend_output.b() : dest.b(), |
| 871 | registers.output_merger.alpha_enable ? blend_output.a() : dest.a() | 877 | output_merger.alpha_enable ? blend_output.a() : dest.a() |
| 872 | }; | 878 | }; |
| 873 | 879 | ||
| 874 | DrawPixel(x >> 4, y >> 4, result); | 880 | DrawPixel(x >> 4, y >> 4, result); |