diff options
Diffstat (limited to 'src/video_core/rasterizer.cpp')
| -rw-r--r-- | src/video_core/rasterizer.cpp | 457 |
1 files changed, 255 insertions, 202 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 6f369a00e..dbdc37ce6 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -15,16 +15,16 @@ | |||
| 15 | #include "common/microprofile.h" | 15 | #include "common/microprofile.h" |
| 16 | #include "common/vector_math.h" | 16 | #include "common/vector_math.h" |
| 17 | 17 | ||
| 18 | #include "core/memory.h" | ||
| 19 | #include "core/hw/gpu.h" | 18 | #include "core/hw/gpu.h" |
| 19 | #include "core/memory.h" | ||
| 20 | 20 | ||
| 21 | #include "video_core/debug_utils/debug_utils.h" | 21 | #include "video_core/debug_utils/debug_utils.h" |
| 22 | #include "video_core/pica.h" | 22 | #include "video_core/pica.h" |
| 23 | #include "video_core/pica_state.h" | 23 | #include "video_core/pica_state.h" |
| 24 | #include "video_core/pica_types.h" | 24 | #include "video_core/pica_types.h" |
| 25 | #include "video_core/rasterizer.h" | 25 | #include "video_core/rasterizer.h" |
| 26 | #include "video_core/utils.h" | ||
| 27 | #include "video_core/shader/shader.h" | 26 | #include "video_core/shader/shader.h" |
| 27 | #include "video_core/utils.h" | ||
| 28 | 28 | ||
| 29 | namespace Pica { | 29 | namespace Pica { |
| 30 | 30 | ||
| @@ -39,8 +39,10 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |||
| 39 | y = framebuffer.height - y; | 39 | y = framebuffer.height - y; |
| 40 | 40 | ||
| 41 | const u32 coarse_y = y & ~7; | 41 | const u32 coarse_y = y & ~7; |
| 42 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | 42 | u32 bytes_per_pixel = |
| 43 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; | 43 | GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |
| 44 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||
| 45 | coarse_y * framebuffer.width * bytes_per_pixel; | ||
| 44 | u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; | 46 | u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; |
| 45 | 47 | ||
| 46 | switch (framebuffer.color_format) { | 48 | switch (framebuffer.color_format) { |
| @@ -65,7 +67,8 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |||
| 65 | break; | 67 | break; |
| 66 | 68 | ||
| 67 | default: | 69 | default: |
| 68 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); | 70 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", |
| 71 | framebuffer.color_format.Value()); | ||
| 69 | UNIMPLEMENTED(); | 72 | UNIMPLEMENTED(); |
| 70 | } | 73 | } |
| 71 | } | 74 | } |
| @@ -77,8 +80,10 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 77 | y = framebuffer.height - y; | 80 | y = framebuffer.height - y; |
| 78 | 81 | ||
| 79 | const u32 coarse_y = y & ~7; | 82 | const u32 coarse_y = y & ~7; |
| 80 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | 83 | u32 bytes_per_pixel = |
| 81 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; | 84 | GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |
| 85 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||
| 86 | coarse_y * framebuffer.width * bytes_per_pixel; | ||
| 82 | u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; | 87 | u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; |
| 83 | 88 | ||
| 84 | switch (framebuffer.color_format) { | 89 | switch (framebuffer.color_format) { |
| @@ -98,7 +103,8 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 98 | return Color::DecodeRGBA4(src_pixel); | 103 | return Color::DecodeRGBA4(src_pixel); |
| 99 | 104 | ||
| 100 | default: | 105 | default: |
| 101 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); | 106 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", |
| 107 | framebuffer.color_format.Value()); | ||
| 102 | UNIMPLEMENTED(); | 108 | UNIMPLEMENTED(); |
| 103 | } | 109 | } |
| 104 | 110 | ||
| @@ -120,16 +126,16 @@ static u32 GetDepth(int x, int y) { | |||
| 120 | u8* src_pixel = depth_buffer + src_offset; | 126 | u8* src_pixel = depth_buffer + src_offset; |
| 121 | 127 | ||
| 122 | switch (framebuffer.depth_format) { | 128 | switch (framebuffer.depth_format) { |
| 123 | case Regs::DepthFormat::D16: | 129 | case Regs::DepthFormat::D16: |
| 124 | return Color::DecodeD16(src_pixel); | 130 | return Color::DecodeD16(src_pixel); |
| 125 | case Regs::DepthFormat::D24: | 131 | case Regs::DepthFormat::D24: |
| 126 | return Color::DecodeD24(src_pixel); | 132 | return Color::DecodeD24(src_pixel); |
| 127 | case Regs::DepthFormat::D24S8: | 133 | case Regs::DepthFormat::D24S8: |
| 128 | return Color::DecodeD24S8(src_pixel).x; | 134 | return Color::DecodeD24S8(src_pixel).x; |
| 129 | default: | 135 | default: |
| 130 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | 136 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 131 | UNIMPLEMENTED(); | 137 | UNIMPLEMENTED(); |
| 132 | return 0; | 138 | return 0; |
| 133 | } | 139 | } |
| 134 | } | 140 | } |
| 135 | 141 | ||
| @@ -148,12 +154,15 @@ static u8 GetStencil(int x, int y) { | |||
| 148 | u8* src_pixel = depth_buffer + src_offset; | 154 | u8* src_pixel = depth_buffer + src_offset; |
| 149 | 155 | ||
| 150 | switch (framebuffer.depth_format) { | 156 | switch (framebuffer.depth_format) { |
| 151 | case Regs::DepthFormat::D24S8: | 157 | case Regs::DepthFormat::D24S8: |
| 152 | return Color::DecodeD24S8(src_pixel).y; | 158 | return Color::DecodeD24S8(src_pixel).y; |
| 153 | 159 | ||
| 154 | default: | 160 | default: |
| 155 | LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); | 161 | LOG_WARNING( |
| 156 | return 0; | 162 | HW_GPU, |
| 163 | "GetStencil called for function which doesn't have a stencil component (format %u)", | ||
| 164 | framebuffer.depth_format); | ||
| 165 | return 0; | ||
| 157 | } | 166 | } |
| 158 | } | 167 | } |
| 159 | 168 | ||
| @@ -172,22 +181,22 @@ static void SetDepth(int x, int y, u32 value) { | |||
| 172 | u8* dst_pixel = depth_buffer + dst_offset; | 181 | u8* dst_pixel = depth_buffer + dst_offset; |
| 173 | 182 | ||
| 174 | switch (framebuffer.depth_format) { | 183 | switch (framebuffer.depth_format) { |
| 175 | case Regs::DepthFormat::D16: | 184 | case Regs::DepthFormat::D16: |
| 176 | Color::EncodeD16(value, dst_pixel); | 185 | Color::EncodeD16(value, dst_pixel); |
| 177 | break; | 186 | break; |
| 178 | 187 | ||
| 179 | case Regs::DepthFormat::D24: | 188 | case Regs::DepthFormat::D24: |
| 180 | Color::EncodeD24(value, dst_pixel); | 189 | Color::EncodeD24(value, dst_pixel); |
| 181 | break; | 190 | break; |
| 182 | 191 | ||
| 183 | case Regs::DepthFormat::D24S8: | 192 | case Regs::DepthFormat::D24S8: |
| 184 | Color::EncodeD24X8(value, dst_pixel); | 193 | Color::EncodeD24X8(value, dst_pixel); |
| 185 | break; | 194 | break; |
| 186 | 195 | ||
| 187 | default: | 196 | default: |
| 188 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | 197 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 189 | UNIMPLEMENTED(); | 198 | UNIMPLEMENTED(); |
| 190 | break; | 199 | break; |
| 191 | } | 200 | } |
| 192 | } | 201 | } |
| 193 | 202 | ||
| @@ -206,19 +215,19 @@ static void SetStencil(int x, int y, u8 value) { | |||
| 206 | u8* dst_pixel = depth_buffer + dst_offset; | 215 | u8* dst_pixel = depth_buffer + dst_offset; |
| 207 | 216 | ||
| 208 | switch (framebuffer.depth_format) { | 217 | switch (framebuffer.depth_format) { |
| 209 | case Pica::Regs::DepthFormat::D16: | 218 | case Pica::Regs::DepthFormat::D16: |
| 210 | case Pica::Regs::DepthFormat::D24: | 219 | case Pica::Regs::DepthFormat::D24: |
| 211 | // Nothing to do | 220 | // Nothing to do |
| 212 | break; | 221 | break; |
| 213 | 222 | ||
| 214 | case Pica::Regs::DepthFormat::D24S8: | 223 | case Pica::Regs::DepthFormat::D24S8: |
| 215 | Color::EncodeX24S8(value, dst_pixel); | 224 | Color::EncodeX24S8(value, dst_pixel); |
| 216 | break; | 225 | break; |
| 217 | 226 | ||
| 218 | default: | 227 | default: |
| 219 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | 228 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 220 | UNIMPLEMENTED(); | 229 | UNIMPLEMENTED(); |
| 221 | break; | 230 | break; |
| 222 | } | 231 | } |
| 223 | } | 232 | } |
| 224 | 233 | ||
| @@ -259,18 +268,24 @@ static u8 PerformStencilAction(Regs::StencilAction action, u8 old_stencil, u8 re | |||
| 259 | 268 | ||
| 260 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values | 269 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values |
| 261 | struct Fix12P4 { | 270 | struct Fix12P4 { |
| 262 | Fix12P4() {} | 271 | Fix12P4() { |
| 263 | Fix12P4(u16 val) : val(val) {} | 272 | } |
| 273 | Fix12P4(u16 val) : val(val) { | ||
| 274 | } | ||
| 264 | 275 | ||
| 265 | static u16 FracMask() { return 0xF; } | 276 | static u16 FracMask() { |
| 266 | static u16 IntMask() { return (u16)~0xF; } | 277 | return 0xF; |
| 278 | } | ||
| 279 | static u16 IntMask() { | ||
| 280 | return (u16)~0xF; | ||
| 281 | } | ||
| 267 | 282 | ||
| 268 | operator u16() const { | 283 | operator u16() const { |
| 269 | return val; | 284 | return val; |
| 270 | } | 285 | } |
| 271 | 286 | ||
| 272 | bool operator < (const Fix12P4& oth) const { | 287 | bool operator<(const Fix12P4& oth) const { |
| 273 | return (u16)*this < (u16)oth; | 288 | return (u16) * this < (u16)oth; |
| 274 | } | 289 | } |
| 275 | 290 | ||
| 276 | private: | 291 | private: |
| @@ -283,9 +298,8 @@ private: | |||
| 283 | * | 298 | * |
| 284 | * @todo define orientation concretely. | 299 | * @todo define orientation concretely. |
| 285 | */ | 300 | */ |
| 286 | static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, | 301 | static int SignedArea(const Math::Vec2<Fix12P4>& vtx1, const Math::Vec2<Fix12P4>& vtx2, |
| 287 | const Math::Vec2<Fix12P4>& vtx2, | 302 | const Math::Vec2<Fix12P4>& vtx3) { |
| 288 | const Math::Vec2<Fix12P4>& vtx3) { | ||
| 289 | const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); | 303 | const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); |
| 290 | const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); | 304 | const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); |
| 291 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 | 305 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 |
| @@ -298,11 +312,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24 | |||
| 298 | * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing | 312 | * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing |
| 299 | * culling via recursion. | 313 | * culling via recursion. |
| 300 | */ | 314 | */ |
| 301 | static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | 315 | static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, |
| 302 | const Shader::OutputVertex& v1, | 316 | const Shader::OutputVertex& v2, bool reversed = false) { |
| 303 | const Shader::OutputVertex& v2, | ||
| 304 | bool reversed = false) | ||
| 305 | { | ||
| 306 | const auto& regs = g_state.regs; | 317 | const auto& regs = g_state.regs; |
| 307 | MICROPROFILE_SCOPE(GPU_Rasterization); | 318 | MICROPROFILE_SCOPE(GPU_Rasterization); |
| 308 | 319 | ||
| @@ -316,9 +327,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 316 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | 327 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; |
| 317 | }; | 328 | }; |
| 318 | 329 | ||
| 319 | Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), | 330 | Math::Vec3<Fix12P4> vtxpos[3]{ScreenToRasterizerCoordinates(v0.screenpos), |
| 320 | ScreenToRasterizerCoordinates(v1.screenpos), | 331 | ScreenToRasterizerCoordinates(v1.screenpos), |
| 321 | ScreenToRasterizerCoordinates(v2.screenpos) }; | 332 | ScreenToRasterizerCoordinates(v2.screenpos)}; |
| 322 | 333 | ||
| 323 | if (regs.cull_mode == Regs::CullMode::KeepAll) { | 334 | if (regs.cull_mode == Regs::CullMode::KeepAll) { |
| 324 | // Make sure we always end up with a triangle wound counter-clockwise | 335 | // Make sure we always end up with a triangle wound counter-clockwise |
| @@ -344,8 +355,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 344 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | 355 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); |
| 345 | 356 | ||
| 346 | // Convert the scissor box coordinates to 12.4 fixed point | 357 | // Convert the scissor box coordinates to 12.4 fixed point |
| 347 | u16 scissor_x1 = (u16)( regs.scissor_test.x1 << 4); | 358 | u16 scissor_x1 = (u16)(regs.scissor_test.x1 << 4); |
| 348 | u16 scissor_y1 = (u16)( regs.scissor_test.y1 << 4); | 359 | u16 scissor_y1 = (u16)(regs.scissor_test.y1 << 4); |
| 349 | // x2,y2 have +1 added to cover the entire sub-pixel area | 360 | // x2,y2 have +1 added to cover the entire sub-pixel area |
| 350 | u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); | 361 | u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); |
| 351 | u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); | 362 | u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); |
| @@ -369,27 +380,32 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 369 | // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... | 380 | // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... |
| 370 | auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, | 381 | auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, |
| 371 | const Math::Vec2<Fix12P4>& line1, | 382 | const Math::Vec2<Fix12P4>& line1, |
| 372 | const Math::Vec2<Fix12P4>& line2) | 383 | const Math::Vec2<Fix12P4>& line2) { |
| 373 | { | ||
| 374 | if (line1.y == line2.y) { | 384 | if (line1.y == line2.y) { |
| 375 | // just check if vertex is above us => bottom line parallel to x-axis | 385 | // just check if vertex is above us => bottom line parallel to x-axis |
| 376 | return vtx.y < line1.y; | 386 | return vtx.y < line1.y; |
| 377 | } else { | 387 | } else { |
| 378 | // check if vertex is on our left => right side | 388 | // check if vertex is on our left => right side |
| 379 | // TODO: Not sure how likely this is to overflow | 389 | // TODO: Not sure how likely this is to overflow |
| 380 | return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y); | 390 | return (int)vtx.x < (int)line1.x + |
| 391 | ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / | ||
| 392 | ((int)line2.y - (int)line1.y); | ||
| 381 | } | 393 | } |
| 382 | }; | 394 | }; |
| 383 | int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; | 395 | int bias0 = |
| 384 | int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | 396 | IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; |
| 385 | int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | 397 | int bias1 = |
| 398 | IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | ||
| 399 | int bias2 = | ||
| 400 | IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | ||
| 386 | 401 | ||
| 387 | auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); | 402 | auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); |
| 388 | 403 | ||
| 389 | auto textures = regs.GetTextures(); | 404 | auto textures = regs.GetTextures(); |
| 390 | auto tev_stages = regs.GetTevStages(); | 405 | auto tev_stages = regs.GetTevStages(); |
| 391 | 406 | ||
| 392 | bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; | 407 | bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && |
| 408 | g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; | ||
| 393 | const auto stencil_test = g_state.regs.output_merger.stencil_test; | 409 | const auto stencil_test = g_state.regs.output_merger.stencil_test; |
| 394 | 410 | ||
| 395 | // Enter rasterization loop, starting at the center of the topleft bounding box corner. | 411 | // Enter rasterization loop, starting at the center of the topleft bounding box corner. |
| @@ -397,10 +413,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 397 | for (u16 y = min_y + 8; y < max_y; y += 0x10) { | 413 | for (u16 y = min_y + 8; y < max_y; y += 0x10) { |
| 398 | for (u16 x = min_x + 8; x < max_x; x += 0x10) { | 414 | for (u16 x = min_x + 8; x < max_x; x += 0x10) { |
| 399 | 415 | ||
| 400 | // Do not process the pixel if it's inside the scissor box and the scissor mode is set to Exclude | 416 | // Do not process the pixel if it's inside the scissor box and the scissor mode is set |
| 417 | // to Exclude | ||
| 401 | if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { | 418 | if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { |
| 402 | if (x >= scissor_x1 && x < scissor_x2 && | 419 | if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) |
| 403 | y >= scissor_y1 && y < scissor_y2) | ||
| 404 | continue; | 420 | continue; |
| 405 | } | 421 | } |
| 406 | 422 | ||
| @@ -414,15 +430,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 414 | if (w0 < 0 || w1 < 0 || w2 < 0) | 430 | if (w0 < 0 || w1 < 0 || w2 < 0) |
| 415 | continue; | 431 | continue; |
| 416 | 432 | ||
| 417 | auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), | 433 | auto baricentric_coordinates = |
| 418 | float24::FromFloat32(static_cast<float>(w1)), | 434 | Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), |
| 419 | float24::FromFloat32(static_cast<float>(w2))); | 435 | float24::FromFloat32(static_cast<float>(w1)), |
| 420 | float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); | 436 | float24::FromFloat32(static_cast<float>(w2))); |
| 437 | float24 interpolated_w_inverse = | ||
| 438 | float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); | ||
| 421 | 439 | ||
| 422 | // interpolated_z = z / w | 440 | // interpolated_z = z / w |
| 423 | float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + | 441 | float interpolated_z_over_w = |
| 424 | v1.screenpos[2].ToFloat32() * w1 + | 442 | (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 + |
| 425 | v2.screenpos[2].ToFloat32() * w2) / wsum; | 443 | v2.screenpos[2].ToFloat32() * w2) / |
| 444 | wsum; | ||
| 426 | 445 | ||
| 427 | // Not fully accurate. About 3 bits in precision are missing. | 446 | // Not fully accurate. About 3 bits in precision are missing. |
| 428 | // Z-Buffer (z / w * scale + offset) | 447 | // Z-Buffer (z / w * scale + offset) |
| @@ -461,11 +480,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 461 | }; | 480 | }; |
| 462 | 481 | ||
| 463 | Math::Vec4<u8> primary_color{ | 482 | Math::Vec4<u8> primary_color{ |
| 464 | (u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255), | 483 | (u8)( |
| 465 | (u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255), | 484 | GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * |
| 466 | (u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255), | 485 | 255), |
| 467 | (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) | 486 | (u8)( |
| 468 | }; | 487 | GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * |
| 488 | 255), | ||
| 489 | (u8)( | ||
| 490 | GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * | ||
| 491 | 255), | ||
| 492 | (u8)( | ||
| 493 | GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * | ||
| 494 | 255)}; | ||
| 469 | 495 | ||
| 470 | Math::Vec2<float24> uv[3]; | 496 | Math::Vec2<float24> uv[3]; |
| 471 | uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); | 497 | uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); |
| @@ -489,7 +515,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 489 | // Only unit 0 respects the texturing type (according to 3DBrew) | 515 | // Only unit 0 respects the texturing type (according to 3DBrew) |
| 490 | // TODO: Refactor so cubemaps and shadowmaps can be handled | 516 | // TODO: Refactor so cubemaps and shadowmaps can be handled |
| 491 | if (i == 0) { | 517 | if (i == 0) { |
| 492 | switch(texture.config.type) { | 518 | switch (texture.config.type) { |
| 493 | case Regs::TextureConfig::Texture2D: | 519 | case Regs::TextureConfig::Texture2D: |
| 494 | break; | 520 | break; |
| 495 | case Regs::TextureConfig::Projection2D: { | 521 | case Regs::TextureConfig::Projection2D: { |
| @@ -506,51 +532,58 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 506 | } | 532 | } |
| 507 | } | 533 | } |
| 508 | 534 | ||
| 509 | int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); | 535 | int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))) |
| 510 | int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); | 536 | .ToFloat32(); |
| 511 | 537 | int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) | |
| 538 | .ToFloat32(); | ||
| 512 | 539 | ||
| 513 | static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { | 540 | static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, |
| 541 | unsigned size) { | ||
| 514 | switch (mode) { | 542 | switch (mode) { |
| 515 | case Regs::TextureConfig::ClampToEdge: | 543 | case Regs::TextureConfig::ClampToEdge: |
| 516 | val = std::max(val, 0); | 544 | val = std::max(val, 0); |
| 517 | val = std::min(val, (int)size - 1); | 545 | val = std::min(val, (int)size - 1); |
| 518 | return val; | 546 | return val; |
| 519 | 547 | ||
| 520 | case Regs::TextureConfig::ClampToBorder: | 548 | case Regs::TextureConfig::ClampToBorder: |
| 521 | return val; | 549 | return val; |
| 522 | 550 | ||
| 523 | case Regs::TextureConfig::Repeat: | 551 | case Regs::TextureConfig::Repeat: |
| 524 | return (int)((unsigned)val % size); | 552 | return (int)((unsigned)val % size); |
| 525 | 553 | ||
| 526 | case Regs::TextureConfig::MirroredRepeat: | 554 | case Regs::TextureConfig::MirroredRepeat: { |
| 527 | { | 555 | unsigned int coord = ((unsigned)val % (2 * size)); |
| 528 | unsigned int coord = ((unsigned)val % (2 * size)); | 556 | if (coord >= size) |
| 529 | if (coord >= size) | 557 | coord = 2 * size - 1 - coord; |
| 530 | coord = 2 * size - 1 - coord; | 558 | return (int)coord; |
| 531 | return (int)coord; | 559 | } |
| 532 | } | 560 | |
| 533 | 561 | default: | |
| 534 | default: | 562 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); |
| 535 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); | 563 | UNIMPLEMENTED(); |
| 536 | UNIMPLEMENTED(); | 564 | return 0; |
| 537 | return 0; | ||
| 538 | } | 565 | } |
| 539 | }; | 566 | }; |
| 540 | 567 | ||
| 541 | if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && (s < 0 || s >= texture.config.width)) | 568 | if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && |
| 542 | || (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && (t < 0 || t >= texture.config.height))) { | 569 | (s < 0 || s >= texture.config.width)) || |
| 570 | (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && | ||
| 571 | (t < 0 || t >= texture.config.height))) { | ||
| 543 | auto border_color = texture.config.border_color; | 572 | auto border_color = texture.config.border_color; |
| 544 | texture_color[i] = { border_color.r, border_color.g, border_color.b, border_color.a }; | 573 | texture_color[i] = {border_color.r, border_color.g, border_color.b, |
| 574 | border_color.a}; | ||
| 545 | } else { | 575 | } else { |
| 546 | // Textures are laid out from bottom to top, hence we invert the t coordinate. | 576 | // Textures are laid out from bottom to top, hence we invert the t coordinate. |
| 547 | // NOTE: This may not be the right place for the inversion. | 577 | // NOTE: This may not be the right place for the inversion. |
| 548 | // TODO: Check if this applies to ETC textures, too. | 578 | // TODO: Check if this applies to ETC textures, too. |
| 549 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); | 579 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); |
| 550 | t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | 580 | t = texture.config.height - 1 - |
| 581 | GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | ||
| 551 | 582 | ||
| 552 | u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); | 583 | u8* texture_data = |
| 553 | auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); | 584 | Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); |
| 585 | auto info = | ||
| 586 | DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); | ||
| 554 | 587 | ||
| 555 | // TODO: Apply the min and mag filters to the texture | 588 | // TODO: Apply the min and mag filters to the texture |
| 556 | texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); | 589 | texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); |
| @@ -571,10 +604,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 571 | Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; | 604 | Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; |
| 572 | Math::Vec4<u8> next_combiner_buffer = { | 605 | Math::Vec4<u8> next_combiner_buffer = { |
| 573 | regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, | 606 | regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, |
| 574 | regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a | 607 | regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a}; |
| 575 | }; | ||
| 576 | 608 | ||
| 577 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | 609 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); |
| 610 | ++tev_stage_index) { | ||
| 578 | const auto& tev_stage = tev_stages[tev_stage_index]; | 611 | const auto& tev_stage = tev_stages[tev_stage_index]; |
| 579 | using Source = Regs::TevStageConfig::Source; | 612 | using Source = Regs::TevStageConfig::Source; |
| 580 | using ColorModifier = Regs::TevStageConfig::ColorModifier; | 613 | using ColorModifier = Regs::TevStageConfig::ColorModifier; |
| @@ -606,7 +639,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 606 | return combiner_buffer; | 639 | return combiner_buffer; |
| 607 | 640 | ||
| 608 | case Source::Constant: | 641 | case Source::Constant: |
| 609 | return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; | 642 | return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, |
| 643 | tev_stage.const_a}; | ||
| 610 | 644 | ||
| 611 | case Source::Previous: | 645 | case Source::Previous: |
| 612 | return combiner_output; | 646 | return combiner_output; |
| @@ -618,7 +652,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 618 | } | 652 | } |
| 619 | }; | 653 | }; |
| 620 | 654 | ||
| 621 | static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | 655 | static auto GetColorModifier = [](ColorModifier factor, |
| 656 | const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | ||
| 622 | switch (factor) { | 657 | switch (factor) { |
| 623 | case ColorModifier::SourceColor: | 658 | case ColorModifier::SourceColor: |
| 624 | return values.rgb(); | 659 | return values.rgb(); |
| @@ -652,7 +687,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 652 | } | 687 | } |
| 653 | }; | 688 | }; |
| 654 | 689 | ||
| 655 | static auto GetAlphaModifier = [](AlphaModifier factor, const Math::Vec4<u8>& values) -> u8 { | 690 | static auto GetAlphaModifier = [](AlphaModifier factor, |
| 691 | const Math::Vec4<u8>& values) -> u8 { | ||
| 656 | switch (factor) { | 692 | switch (factor) { |
| 657 | case AlphaModifier::SourceAlpha: | 693 | case AlphaModifier::SourceAlpha: |
| 658 | return values.a(); | 694 | return values.a(); |
| @@ -680,7 +716,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 680 | } | 716 | } |
| 681 | }; | 717 | }; |
| 682 | 718 | ||
| 683 | static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | 719 | static auto ColorCombine = [](Operation op, |
| 720 | const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | ||
| 684 | switch (op) { | 721 | switch (op) { |
| 685 | case Operation::Replace: | 722 | case Operation::Replace: |
| 686 | return input[0]; | 723 | return input[0]; |
| @@ -688,8 +725,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 688 | case Operation::Modulate: | 725 | case Operation::Modulate: |
| 689 | return ((input[0] * input[1]) / 255).Cast<u8>(); | 726 | return ((input[0] * input[1]) / 255).Cast<u8>(); |
| 690 | 727 | ||
| 691 | case Operation::Add: | 728 | case Operation::Add: { |
| 692 | { | ||
| 693 | auto result = input[0] + input[1]; | 729 | auto result = input[0] + input[1]; |
| 694 | result.r() = std::min(255, result.r()); | 730 | result.r() = std::min(255, result.r()); |
| 695 | result.g() = std::min(255, result.g()); | 731 | result.g() = std::min(255, result.g()); |
| @@ -697,10 +733,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 697 | return result.Cast<u8>(); | 733 | return result.Cast<u8>(); |
| 698 | } | 734 | } |
| 699 | 735 | ||
| 700 | case Operation::AddSigned: | 736 | case Operation::AddSigned: { |
| 701 | { | 737 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to |
| 702 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct | 738 | // (byte) 128 is correct |
| 703 | auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); | 739 | auto result = input[0].Cast<int>() + input[1].Cast<int>() - |
| 740 | Math::MakeVec<int>(128, 128, 128); | ||
| 704 | result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | 741 | result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); |
| 705 | result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | 742 | result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); |
| 706 | result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | 743 | result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); |
| @@ -708,10 +745,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 708 | } | 745 | } |
| 709 | 746 | ||
| 710 | case Operation::Lerp: | 747 | case Operation::Lerp: |
| 711 | return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); | 748 | return ((input[0] * input[2] + |
| 749 | input[1] * | ||
| 750 | (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / | ||
| 751 | 255) | ||
| 752 | .Cast<u8>(); | ||
| 712 | 753 | ||
| 713 | case Operation::Subtract: | 754 | case Operation::Subtract: { |
| 714 | { | ||
| 715 | auto result = input[0].Cast<int>() - input[1].Cast<int>(); | 755 | auto result = input[0].Cast<int>() - input[1].Cast<int>(); |
| 716 | result.r() = std::max(0, result.r()); | 756 | result.r() = std::max(0, result.r()); |
| 717 | result.g() = std::max(0, result.g()); | 757 | result.g() = std::max(0, result.g()); |
| @@ -719,8 +759,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 719 | return result.Cast<u8>(); | 759 | return result.Cast<u8>(); |
| 720 | } | 760 | } |
| 721 | 761 | ||
| 722 | case Operation::MultiplyThenAdd: | 762 | case Operation::MultiplyThenAdd: { |
| 723 | { | ||
| 724 | auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | 763 | auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; |
| 725 | result.r() = std::min(255, result.r()); | 764 | result.r() = std::min(255, result.r()); |
| 726 | result.g() = std::min(255, result.g()); | 765 | result.g() = std::min(255, result.g()); |
| @@ -728,8 +767,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 728 | return result.Cast<u8>(); | 767 | return result.Cast<u8>(); |
| 729 | } | 768 | } |
| 730 | 769 | ||
| 731 | case Operation::AddThenMultiply: | 770 | case Operation::AddThenMultiply: { |
| 732 | { | ||
| 733 | auto result = input[0] + input[1]; | 771 | auto result = input[0] + input[1]; |
| 734 | result.r() = std::min(255, result.r()); | 772 | result.r() = std::min(255, result.r()); |
| 735 | result.g() = std::min(255, result.g()); | 773 | result.g() = std::min(255, result.g()); |
| @@ -737,17 +775,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 737 | result = (result * input[2].Cast<int>()) / 255; | 775 | result = (result * input[2].Cast<int>()) / 255; |
| 738 | return result.Cast<u8>(); | 776 | return result.Cast<u8>(); |
| 739 | } | 777 | } |
| 740 | case Operation::Dot3_RGB: | 778 | case Operation::Dot3_RGB: { |
| 741 | { | ||
| 742 | // Not fully accurate. | 779 | // Not fully accurate. |
| 743 | // Worst case scenario seems to yield a +/-3 error | 780 | // Worst case scenario seems to yield a +/-3 error |
| 744 | // Some HW results indicate that the per-component computation can't have a higher precision than 1/256, | 781 | // Some HW results indicate that the per-component computation can't have a |
| 745 | // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results | 782 | // higher precision than 1/256, |
| 746 | int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | 783 | // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( |
| 747 | ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | 784 | // (0x80,g0,b0),(0x80,g1,b1) ) give different results |
| 748 | ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | 785 | int result = |
| 786 | ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||
| 787 | ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||
| 788 | ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||
| 749 | result = std::max(0, std::min(255, result)); | 789 | result = std::max(0, std::min(255, result)); |
| 750 | return { (u8)result, (u8)result, (u8)result }; | 790 | return {(u8)result, (u8)result, (u8)result}; |
| 751 | } | 791 | } |
| 752 | default: | 792 | default: |
| 753 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); | 793 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); |
| @@ -756,7 +796,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 756 | } | 796 | } |
| 757 | }; | 797 | }; |
| 758 | 798 | ||
| 759 | static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { | 799 | static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 { |
| 760 | switch (op) { | 800 | switch (op) { |
| 761 | case Operation::Replace: | 801 | case Operation::Replace: |
| 762 | return input[0]; | 802 | return input[0]; |
| @@ -767,9 +807,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 767 | case Operation::Add: | 807 | case Operation::Add: |
| 768 | return std::min(255, input[0] + input[1]); | 808 | return std::min(255, input[0] + input[1]); |
| 769 | 809 | ||
| 770 | case Operation::AddSigned: | 810 | case Operation::AddSigned: { |
| 771 | { | 811 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to |
| 772 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct | 812 | // (byte) 128 is correct |
| 773 | auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; | 813 | auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; |
| 774 | return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); | 814 | return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); |
| 775 | } | 815 | } |
| @@ -801,32 +841,40 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 801 | Math::Vec3<u8> color_result[3] = { | 841 | Math::Vec3<u8> color_result[3] = { |
| 802 | GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), | 842 | GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), |
| 803 | GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), | 843 | GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), |
| 804 | GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)) | 844 | GetColorModifier(tev_stage.color_modifier3, |
| 805 | }; | 845 | GetSource(tev_stage.color_source3))}; |
| 806 | auto color_output = ColorCombine(tev_stage.color_op, color_result); | 846 | auto color_output = ColorCombine(tev_stage.color_op, color_result); |
| 807 | 847 | ||
| 808 | // alpha combiner | 848 | // alpha combiner |
| 809 | std::array<u8,3> alpha_result = {{ | 849 | std::array<u8, 3> alpha_result = { |
| 810 | GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)), | 850 | {GetAlphaModifier(tev_stage.alpha_modifier1, |
| 811 | GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)), | 851 | GetSource(tev_stage.alpha_source1)), |
| 812 | GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)) | 852 | GetAlphaModifier(tev_stage.alpha_modifier2, |
| 813 | }}; | 853 | GetSource(tev_stage.alpha_source2)), |
| 854 | GetAlphaModifier(tev_stage.alpha_modifier3, | ||
| 855 | GetSource(tev_stage.alpha_source3))}}; | ||
| 814 | auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); | 856 | auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); |
| 815 | 857 | ||
| 816 | combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); | 858 | combiner_output[0] = |
| 817 | combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); | 859 | std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); |
| 818 | combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); | 860 | combiner_output[1] = |
| 819 | combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); | 861 | std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); |
| 862 | combiner_output[2] = | ||
| 863 | std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); | ||
| 864 | combiner_output[3] = | ||
| 865 | std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); | ||
| 820 | 866 | ||
| 821 | combiner_buffer = next_combiner_buffer; | 867 | combiner_buffer = next_combiner_buffer; |
| 822 | 868 | ||
| 823 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { | 869 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor( |
| 870 | tev_stage_index)) { | ||
| 824 | next_combiner_buffer.r() = combiner_output.r(); | 871 | next_combiner_buffer.r() = combiner_output.r(); |
| 825 | next_combiner_buffer.g() = combiner_output.g(); | 872 | next_combiner_buffer.g() = combiner_output.g(); |
| 826 | next_combiner_buffer.b() = combiner_output.b(); | 873 | next_combiner_buffer.b() = combiner_output.b(); |
| 827 | } | 874 | } |
| 828 | 875 | ||
| 829 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { | 876 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha( |
| 877 | tev_stage_index)) { | ||
| 830 | next_combiner_buffer.a() = combiner_output.a(); | 878 | next_combiner_buffer.a() = combiner_output.a(); |
| 831 | } | 879 | } |
| 832 | } | 880 | } |
| @@ -897,21 +945,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 897 | float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); | 945 | float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); |
| 898 | float fog_f = fog_index - fog_i; | 946 | float fog_f = fog_index - fog_i; |
| 899 | const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; | 947 | const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; |
| 900 | float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / 2047.0f; // This is signed fixed point 1.11 | 948 | float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / |
| 949 | 2047.0f; // This is signed fixed point 1.11 | ||
| 901 | fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); | 950 | fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); |
| 902 | 951 | ||
| 903 | // Blend the fog | 952 | // Blend the fog |
| 904 | for (unsigned i = 0; i < 3; i++) { | 953 | for (unsigned i = 0; i < 3; i++) { |
| 905 | combiner_output[i] = fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; | 954 | combiner_output[i] = |
| 955 | fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; | ||
| 906 | } | 956 | } |
| 907 | } | 957 | } |
| 908 | 958 | ||
| 909 | u8 old_stencil = 0; | 959 | u8 old_stencil = 0; |
| 910 | 960 | ||
| 911 | auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { | 961 | auto UpdateStencil = [stencil_test, x, y, |
| 912 | u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); | 962 | &old_stencil](Pica::Regs::StencilAction action) { |
| 963 | u8 new_stencil = | ||
| 964 | PerformStencilAction(action, old_stencil, stencil_test.reference_value); | ||
| 913 | if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) | 965 | if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) |
| 914 | SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); | 966 | SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | |
| 967 | (old_stencil & ~stencil_test.write_mask)); | ||
| 915 | }; | 968 | }; |
| 916 | 969 | ||
| 917 | if (stencil_action_enable) { | 970 | if (stencil_action_enable) { |
| @@ -1030,8 +1083,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1030 | static_cast<u8>(output_merger.blend_const.r), | 1083 | static_cast<u8>(output_merger.blend_const.r), |
| 1031 | static_cast<u8>(output_merger.blend_const.g), | 1084 | static_cast<u8>(output_merger.blend_const.g), |
| 1032 | static_cast<u8>(output_merger.blend_const.b), | 1085 | static_cast<u8>(output_merger.blend_const.b), |
| 1033 | static_cast<u8>(output_merger.blend_const.a) | 1086 | static_cast<u8>(output_merger.blend_const.a)}; |
| 1034 | }; | ||
| 1035 | 1087 | ||
| 1036 | switch (factor) { | 1088 | switch (factor) { |
| 1037 | case Regs::BlendFactor::Zero: | 1089 | case Regs::BlendFactor::Zero: |
| @@ -1091,12 +1143,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1091 | return combiner_output[channel]; | 1143 | return combiner_output[channel]; |
| 1092 | }; | 1144 | }; |
| 1093 | 1145 | ||
| 1094 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | 1146 | static auto EvaluateBlendEquation = []( |
| 1095 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | 1147 | const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, |
| 1096 | Regs::BlendEquation equation) { | 1148 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, |
| 1149 | Regs::BlendEquation equation) { | ||
| 1097 | Math::Vec4<int> result; | 1150 | Math::Vec4<int> result; |
| 1098 | 1151 | ||
| 1099 | auto src_result = (src * srcfactor).Cast<int>(); | 1152 | auto src_result = (src * srcfactor).Cast<int>(); |
| 1100 | auto dst_result = (dest * destfactor).Cast<int>(); | 1153 | auto dst_result = (dest * destfactor).Cast<int>(); |
| 1101 | 1154 | ||
| 1102 | switch (equation) { | 1155 | switch (equation) { |
| @@ -1134,10 +1187,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1134 | UNIMPLEMENTED(); | 1187 | UNIMPLEMENTED(); |
| 1135 | } | 1188 | } |
| 1136 | 1189 | ||
| 1137 | return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), | 1190 | return Math::Vec4<u8>( |
| 1138 | MathUtil::Clamp(result.g(), 0, 255), | 1191 | MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), |
| 1139 | MathUtil::Clamp(result.b(), 0, 255), | 1192 | MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); |
| 1140 | MathUtil::Clamp(result.a(), 0, 255)); | ||
| 1141 | }; | 1193 | }; |
| 1142 | 1194 | ||
| 1143 | auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), | 1195 | auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), |
| @@ -1150,8 +1202,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1150 | LookupFactor(2, params.factor_dest_rgb), | 1202 | LookupFactor(2, params.factor_dest_rgb), |
| 1151 | LookupFactor(3, params.factor_dest_a)); | 1203 | LookupFactor(3, params.factor_dest_a)); |
| 1152 | 1204 | ||
| 1153 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); | 1205 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, |
| 1154 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); | 1206 | params.blend_equation_rgb); |
| 1207 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, | ||
| 1208 | dstfactor, params.blend_equation_a) | ||
| 1209 | .a(); | ||
| 1155 | } else { | 1210 | } else { |
| 1156 | static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { | 1211 | static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { |
| 1157 | switch (op) { | 1212 | switch (op) { |
| @@ -1205,19 +1260,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1205 | } | 1260 | } |
| 1206 | }; | 1261 | }; |
| 1207 | 1262 | ||
| 1208 | blend_output = Math::MakeVec( | 1263 | blend_output = |
| 1209 | LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), | 1264 | Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), |
| 1210 | LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), | 1265 | LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), |
| 1211 | LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), | 1266 | LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), |
| 1212 | LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); | 1267 | LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); |
| 1213 | } | 1268 | } |
| 1214 | 1269 | ||
| 1215 | const Math::Vec4<u8> result = { | 1270 | const Math::Vec4<u8> result = {output_merger.red_enable ? blend_output.r() : dest.r(), |
| 1216 | output_merger.red_enable ? blend_output.r() : dest.r(), | 1271 | output_merger.green_enable ? blend_output.g() : dest.g(), |
| 1217 | output_merger.green_enable ? blend_output.g() : dest.g(), | 1272 | output_merger.blue_enable ? blend_output.b() : dest.b(), |
| 1218 | output_merger.blue_enable ? blend_output.b() : dest.b(), | 1273 | output_merger.alpha_enable ? blend_output.a() |
| 1219 | output_merger.alpha_enable ? blend_output.a() : dest.a() | 1274 | : dest.a()}; |
| 1220 | }; | ||
| 1221 | 1275 | ||
| 1222 | if (regs.framebuffer.allow_color_write != 0) | 1276 | if (regs.framebuffer.allow_color_write != 0) |
| 1223 | DrawPixel(x >> 4, y >> 4, result); | 1277 | DrawPixel(x >> 4, y >> 4, result); |
| @@ -1225,8 +1279,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1225 | } | 1279 | } |
| 1226 | } | 1280 | } |
| 1227 | 1281 | ||
| 1228 | void ProcessTriangle(const Shader::OutputVertex& v0, | 1282 | void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, |
| 1229 | const Shader::OutputVertex& v1, | ||
| 1230 | const Shader::OutputVertex& v2) { | 1283 | const Shader::OutputVertex& v2) { |
| 1231 | ProcessTriangleInternal(v0, v1, v2); | 1284 | ProcessTriangleInternal(v0, v1, v2); |
| 1232 | } | 1285 | } |