diff options
Diffstat (limited to 'src/video_core/rasterizer.cpp')
| -rw-r--r-- | src/video_core/rasterizer.cpp | 436 |
1 files changed, 243 insertions, 193 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 6f369a00e..6c4bbed33 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cmath> | 7 | #include <cmath> |
| 8 | |||
| 9 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 10 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 11 | #include "common/color.h" | 10 | #include "common/color.h" |
| @@ -14,17 +13,15 @@ | |||
| 14 | #include "common/math_util.h" | 13 | #include "common/math_util.h" |
| 15 | #include "common/microprofile.h" | 14 | #include "common/microprofile.h" |
| 16 | #include "common/vector_math.h" | 15 | #include "common/vector_math.h" |
| 17 | |||
| 18 | #include "core/memory.h" | ||
| 19 | #include "core/hw/gpu.h" | 16 | #include "core/hw/gpu.h" |
| 20 | 17 | #include "core/memory.h" | |
| 21 | #include "video_core/debug_utils/debug_utils.h" | 18 | #include "video_core/debug_utils/debug_utils.h" |
| 22 | #include "video_core/pica.h" | 19 | #include "video_core/pica.h" |
| 23 | #include "video_core/pica_state.h" | 20 | #include "video_core/pica_state.h" |
| 24 | #include "video_core/pica_types.h" | 21 | #include "video_core/pica_types.h" |
| 25 | #include "video_core/rasterizer.h" | 22 | #include "video_core/rasterizer.h" |
| 26 | #include "video_core/utils.h" | ||
| 27 | #include "video_core/shader/shader.h" | 23 | #include "video_core/shader/shader.h" |
| 24 | #include "video_core/utils.h" | ||
| 28 | 25 | ||
| 29 | namespace Pica { | 26 | namespace Pica { |
| 30 | 27 | ||
| @@ -39,8 +36,10 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |||
| 39 | y = framebuffer.height - y; | 36 | y = framebuffer.height - y; |
| 40 | 37 | ||
| 41 | const u32 coarse_y = y & ~7; | 38 | const u32 coarse_y = y & ~7; |
| 42 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | 39 | u32 bytes_per_pixel = |
| 43 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; | 40 | GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |
| 41 | u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||
| 42 | coarse_y * framebuffer.width * bytes_per_pixel; | ||
| 44 | u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; | 43 | u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; |
| 45 | 44 | ||
| 46 | switch (framebuffer.color_format) { | 45 | switch (framebuffer.color_format) { |
| @@ -65,7 +64,8 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { | |||
| 65 | break; | 64 | break; |
| 66 | 65 | ||
| 67 | default: | 66 | default: |
| 68 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); | 67 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", |
| 68 | framebuffer.color_format.Value()); | ||
| 69 | UNIMPLEMENTED(); | 69 | UNIMPLEMENTED(); |
| 70 | } | 70 | } |
| 71 | } | 71 | } |
| @@ -77,8 +77,10 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 77 | y = framebuffer.height - y; | 77 | y = framebuffer.height - y; |
| 78 | 78 | ||
| 79 | const u32 coarse_y = y & ~7; | 79 | const u32 coarse_y = y & ~7; |
| 80 | u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); | 80 | u32 bytes_per_pixel = |
| 81 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; | 81 | GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); |
| 82 | u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + | ||
| 83 | coarse_y * framebuffer.width * bytes_per_pixel; | ||
| 82 | u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; | 84 | u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; |
| 83 | 85 | ||
| 84 | switch (framebuffer.color_format) { | 86 | switch (framebuffer.color_format) { |
| @@ -98,7 +100,8 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { | |||
| 98 | return Color::DecodeRGBA4(src_pixel); | 100 | return Color::DecodeRGBA4(src_pixel); |
| 99 | 101 | ||
| 100 | default: | 102 | default: |
| 101 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); | 103 | LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", |
| 104 | framebuffer.color_format.Value()); | ||
| 102 | UNIMPLEMENTED(); | 105 | UNIMPLEMENTED(); |
| 103 | } | 106 | } |
| 104 | 107 | ||
| @@ -120,16 +123,16 @@ static u32 GetDepth(int x, int y) { | |||
| 120 | u8* src_pixel = depth_buffer + src_offset; | 123 | u8* src_pixel = depth_buffer + src_offset; |
| 121 | 124 | ||
| 122 | switch (framebuffer.depth_format) { | 125 | switch (framebuffer.depth_format) { |
| 123 | case Regs::DepthFormat::D16: | 126 | case Regs::DepthFormat::D16: |
| 124 | return Color::DecodeD16(src_pixel); | 127 | return Color::DecodeD16(src_pixel); |
| 125 | case Regs::DepthFormat::D24: | 128 | case Regs::DepthFormat::D24: |
| 126 | return Color::DecodeD24(src_pixel); | 129 | return Color::DecodeD24(src_pixel); |
| 127 | case Regs::DepthFormat::D24S8: | 130 | case Regs::DepthFormat::D24S8: |
| 128 | return Color::DecodeD24S8(src_pixel).x; | 131 | return Color::DecodeD24S8(src_pixel).x; |
| 129 | default: | 132 | default: |
| 130 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | 133 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 131 | UNIMPLEMENTED(); | 134 | UNIMPLEMENTED(); |
| 132 | return 0; | 135 | return 0; |
| 133 | } | 136 | } |
| 134 | } | 137 | } |
| 135 | 138 | ||
| @@ -148,12 +151,15 @@ static u8 GetStencil(int x, int y) { | |||
| 148 | u8* src_pixel = depth_buffer + src_offset; | 151 | u8* src_pixel = depth_buffer + src_offset; |
| 149 | 152 | ||
| 150 | switch (framebuffer.depth_format) { | 153 | switch (framebuffer.depth_format) { |
| 151 | case Regs::DepthFormat::D24S8: | 154 | case Regs::DepthFormat::D24S8: |
| 152 | return Color::DecodeD24S8(src_pixel).y; | 155 | return Color::DecodeD24S8(src_pixel).y; |
| 153 | 156 | ||
| 154 | default: | 157 | default: |
| 155 | LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); | 158 | LOG_WARNING( |
| 156 | return 0; | 159 | HW_GPU, |
| 160 | "GetStencil called for function which doesn't have a stencil component (format %u)", | ||
| 161 | framebuffer.depth_format); | ||
| 162 | return 0; | ||
| 157 | } | 163 | } |
| 158 | } | 164 | } |
| 159 | 165 | ||
| @@ -172,22 +178,22 @@ static void SetDepth(int x, int y, u32 value) { | |||
| 172 | u8* dst_pixel = depth_buffer + dst_offset; | 178 | u8* dst_pixel = depth_buffer + dst_offset; |
| 173 | 179 | ||
| 174 | switch (framebuffer.depth_format) { | 180 | switch (framebuffer.depth_format) { |
| 175 | case Regs::DepthFormat::D16: | 181 | case Regs::DepthFormat::D16: |
| 176 | Color::EncodeD16(value, dst_pixel); | 182 | Color::EncodeD16(value, dst_pixel); |
| 177 | break; | 183 | break; |
| 178 | 184 | ||
| 179 | case Regs::DepthFormat::D24: | 185 | case Regs::DepthFormat::D24: |
| 180 | Color::EncodeD24(value, dst_pixel); | 186 | Color::EncodeD24(value, dst_pixel); |
| 181 | break; | 187 | break; |
| 182 | 188 | ||
| 183 | case Regs::DepthFormat::D24S8: | 189 | case Regs::DepthFormat::D24S8: |
| 184 | Color::EncodeD24X8(value, dst_pixel); | 190 | Color::EncodeD24X8(value, dst_pixel); |
| 185 | break; | 191 | break; |
| 186 | 192 | ||
| 187 | default: | 193 | default: |
| 188 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | 194 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 189 | UNIMPLEMENTED(); | 195 | UNIMPLEMENTED(); |
| 190 | break; | 196 | break; |
| 191 | } | 197 | } |
| 192 | } | 198 | } |
| 193 | 199 | ||
| @@ -206,19 +212,19 @@ static void SetStencil(int x, int y, u8 value) { | |||
| 206 | u8* dst_pixel = depth_buffer + dst_offset; | 212 | u8* dst_pixel = depth_buffer + dst_offset; |
| 207 | 213 | ||
| 208 | switch (framebuffer.depth_format) { | 214 | switch (framebuffer.depth_format) { |
| 209 | case Pica::Regs::DepthFormat::D16: | 215 | case Pica::Regs::DepthFormat::D16: |
| 210 | case Pica::Regs::DepthFormat::D24: | 216 | case Pica::Regs::DepthFormat::D24: |
| 211 | // Nothing to do | 217 | // Nothing to do |
| 212 | break; | 218 | break; |
| 213 | 219 | ||
| 214 | case Pica::Regs::DepthFormat::D24S8: | 220 | case Pica::Regs::DepthFormat::D24S8: |
| 215 | Color::EncodeX24S8(value, dst_pixel); | 221 | Color::EncodeX24S8(value, dst_pixel); |
| 216 | break; | 222 | break; |
| 217 | 223 | ||
| 218 | default: | 224 | default: |
| 219 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); | 225 | LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); |
| 220 | UNIMPLEMENTED(); | 226 | UNIMPLEMENTED(); |
| 221 | break; | 227 | break; |
| 222 | } | 228 | } |
| 223 | } | 229 | } |
| 224 | 230 | ||
| @@ -262,15 +268,19 @@ struct Fix12P4 { | |||
| 262 | Fix12P4() {} | 268 | Fix12P4() {} |
| 263 | Fix12P4(u16 val) : val(val) {} | 269 | Fix12P4(u16 val) : val(val) {} |
| 264 | 270 | ||
| 265 | static u16 FracMask() { return 0xF; } | 271 | static u16 FracMask() { |
| 266 | static u16 IntMask() { return (u16)~0xF; } | 272 | return 0xF; |
| 273 | } | ||
| 274 | static u16 IntMask() { | ||
| 275 | return (u16)~0xF; | ||
| 276 | } | ||
| 267 | 277 | ||
| 268 | operator u16() const { | 278 | operator u16() const { |
| 269 | return val; | 279 | return val; |
| 270 | } | 280 | } |
| 271 | 281 | ||
| 272 | bool operator < (const Fix12P4& oth) const { | 282 | bool operator<(const Fix12P4& oth) const { |
| 273 | return (u16)*this < (u16)oth; | 283 | return (u16) * this < (u16)oth; |
| 274 | } | 284 | } |
| 275 | 285 | ||
| 276 | private: | 286 | private: |
| @@ -283,9 +293,8 @@ private: | |||
| 283 | * | 293 | * |
| 284 | * @todo define orientation concretely. | 294 | * @todo define orientation concretely. |
| 285 | */ | 295 | */ |
| 286 | static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, | 296 | static int SignedArea(const Math::Vec2<Fix12P4>& vtx1, const Math::Vec2<Fix12P4>& vtx2, |
| 287 | const Math::Vec2<Fix12P4>& vtx2, | 297 | const Math::Vec2<Fix12P4>& vtx3) { |
| 288 | const Math::Vec2<Fix12P4>& vtx3) { | ||
| 289 | const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); | 298 | const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); |
| 290 | const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); | 299 | const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); |
| 291 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 | 300 | // TODO: There is a very small chance this will overflow for sizeof(int) == 4 |
| @@ -298,11 +307,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24 | |||
| 298 | * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing | 307 | * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing |
| 299 | * culling via recursion. | 308 | * culling via recursion. |
| 300 | */ | 309 | */ |
| 301 | static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | 310 | static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, |
| 302 | const Shader::OutputVertex& v1, | 311 | const Shader::OutputVertex& v2, bool reversed = false) { |
| 303 | const Shader::OutputVertex& v2, | ||
| 304 | bool reversed = false) | ||
| 305 | { | ||
| 306 | const auto& regs = g_state.regs; | 312 | const auto& regs = g_state.regs; |
| 307 | MICROPROFILE_SCOPE(GPU_Rasterization); | 313 | MICROPROFILE_SCOPE(GPU_Rasterization); |
| 308 | 314 | ||
| @@ -316,9 +322,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 316 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; | 322 | return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; |
| 317 | }; | 323 | }; |
| 318 | 324 | ||
| 319 | Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), | 325 | Math::Vec3<Fix12P4> vtxpos[3]{ScreenToRasterizerCoordinates(v0.screenpos), |
| 320 | ScreenToRasterizerCoordinates(v1.screenpos), | 326 | ScreenToRasterizerCoordinates(v1.screenpos), |
| 321 | ScreenToRasterizerCoordinates(v2.screenpos) }; | 327 | ScreenToRasterizerCoordinates(v2.screenpos)}; |
| 322 | 328 | ||
| 323 | if (regs.cull_mode == Regs::CullMode::KeepAll) { | 329 | if (regs.cull_mode == Regs::CullMode::KeepAll) { |
| 324 | // Make sure we always end up with a triangle wound counter-clockwise | 330 | // Make sure we always end up with a triangle wound counter-clockwise |
| @@ -344,8 +350,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 344 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); | 350 | u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); |
| 345 | 351 | ||
| 346 | // Convert the scissor box coordinates to 12.4 fixed point | 352 | // Convert the scissor box coordinates to 12.4 fixed point |
| 347 | u16 scissor_x1 = (u16)( regs.scissor_test.x1 << 4); | 353 | u16 scissor_x1 = (u16)(regs.scissor_test.x1 << 4); |
| 348 | u16 scissor_y1 = (u16)( regs.scissor_test.y1 << 4); | 354 | u16 scissor_y1 = (u16)(regs.scissor_test.y1 << 4); |
| 349 | // x2,y2 have +1 added to cover the entire sub-pixel area | 355 | // x2,y2 have +1 added to cover the entire sub-pixel area |
| 350 | u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); | 356 | u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); |
| 351 | u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); | 357 | u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); |
| @@ -369,27 +375,32 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 369 | // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... | 375 | // NOTE: These are the PSP filling rules. Not sure if the 3DS uses the same ones... |
| 370 | auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, | 376 | auto IsRightSideOrFlatBottomEdge = [](const Math::Vec2<Fix12P4>& vtx, |
| 371 | const Math::Vec2<Fix12P4>& line1, | 377 | const Math::Vec2<Fix12P4>& line1, |
| 372 | const Math::Vec2<Fix12P4>& line2) | 378 | const Math::Vec2<Fix12P4>& line2) { |
| 373 | { | ||
| 374 | if (line1.y == line2.y) { | 379 | if (line1.y == line2.y) { |
| 375 | // just check if vertex is above us => bottom line parallel to x-axis | 380 | // just check if vertex is above us => bottom line parallel to x-axis |
| 376 | return vtx.y < line1.y; | 381 | return vtx.y < line1.y; |
| 377 | } else { | 382 | } else { |
| 378 | // check if vertex is on our left => right side | 383 | // check if vertex is on our left => right side |
| 379 | // TODO: Not sure how likely this is to overflow | 384 | // TODO: Not sure how likely this is to overflow |
| 380 | return (int)vtx.x < (int)line1.x + ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / ((int)line2.y - (int)line1.y); | 385 | return (int)vtx.x < (int)line1.x + |
| 386 | ((int)line2.x - (int)line1.x) * ((int)vtx.y - (int)line1.y) / | ||
| 387 | ((int)line2.y - (int)line1.y); | ||
| 381 | } | 388 | } |
| 382 | }; | 389 | }; |
| 383 | int bias0 = IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; | 390 | int bias0 = |
| 384 | int bias1 = IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | 391 | IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; |
| 385 | int bias2 = IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | 392 | int bias1 = |
| 393 | IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; | ||
| 394 | int bias2 = | ||
| 395 | IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; | ||
| 386 | 396 | ||
| 387 | auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); | 397 | auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); |
| 388 | 398 | ||
| 389 | auto textures = regs.GetTextures(); | 399 | auto textures = regs.GetTextures(); |
| 390 | auto tev_stages = regs.GetTevStages(); | 400 | auto tev_stages = regs.GetTevStages(); |
| 391 | 401 | ||
| 392 | bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; | 402 | bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && |
| 403 | g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; | ||
| 393 | const auto stencil_test = g_state.regs.output_merger.stencil_test; | 404 | const auto stencil_test = g_state.regs.output_merger.stencil_test; |
| 394 | 405 | ||
| 395 | // Enter rasterization loop, starting at the center of the topleft bounding box corner. | 406 | // Enter rasterization loop, starting at the center of the topleft bounding box corner. |
| @@ -397,10 +408,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 397 | for (u16 y = min_y + 8; y < max_y; y += 0x10) { | 408 | for (u16 y = min_y + 8; y < max_y; y += 0x10) { |
| 398 | for (u16 x = min_x + 8; x < max_x; x += 0x10) { | 409 | for (u16 x = min_x + 8; x < max_x; x += 0x10) { |
| 399 | 410 | ||
| 400 | // Do not process the pixel if it's inside the scissor box and the scissor mode is set to Exclude | 411 | // Do not process the pixel if it's inside the scissor box and the scissor mode is set |
| 412 | // to Exclude | ||
| 401 | if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { | 413 | if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { |
| 402 | if (x >= scissor_x1 && x < scissor_x2 && | 414 | if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) |
| 403 | y >= scissor_y1 && y < scissor_y2) | ||
| 404 | continue; | 415 | continue; |
| 405 | } | 416 | } |
| 406 | 417 | ||
| @@ -414,15 +425,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 414 | if (w0 < 0 || w1 < 0 || w2 < 0) | 425 | if (w0 < 0 || w1 < 0 || w2 < 0) |
| 415 | continue; | 426 | continue; |
| 416 | 427 | ||
| 417 | auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), | 428 | auto baricentric_coordinates = |
| 418 | float24::FromFloat32(static_cast<float>(w1)), | 429 | Math::MakeVec(float24::FromFloat32(static_cast<float>(w0)), |
| 419 | float24::FromFloat32(static_cast<float>(w2))); | 430 | float24::FromFloat32(static_cast<float>(w1)), |
| 420 | float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); | 431 | float24::FromFloat32(static_cast<float>(w2))); |
| 432 | float24 interpolated_w_inverse = | ||
| 433 | float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); | ||
| 421 | 434 | ||
| 422 | // interpolated_z = z / w | 435 | // interpolated_z = z / w |
| 423 | float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + | 436 | float interpolated_z_over_w = |
| 424 | v1.screenpos[2].ToFloat32() * w1 + | 437 | (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 + |
| 425 | v2.screenpos[2].ToFloat32() * w2) / wsum; | 438 | v2.screenpos[2].ToFloat32() * w2) / |
| 439 | wsum; | ||
| 426 | 440 | ||
| 427 | // Not fully accurate. About 3 bits in precision are missing. | 441 | // Not fully accurate. About 3 bits in precision are missing. |
| 428 | // Z-Buffer (z / w * scale + offset) | 442 | // Z-Buffer (z / w * scale + offset) |
| @@ -461,10 +475,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 461 | }; | 475 | }; |
| 462 | 476 | ||
| 463 | Math::Vec4<u8> primary_color{ | 477 | Math::Vec4<u8> primary_color{ |
| 464 | (u8)(GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * 255), | 478 | (u8)( |
| 465 | (u8)(GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * 255), | 479 | GetInterpolatedAttribute(v0.color.r(), v1.color.r(), v2.color.r()).ToFloat32() * |
| 466 | (u8)(GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * 255), | 480 | 255), |
| 467 | (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) | 481 | (u8)( |
| 482 | GetInterpolatedAttribute(v0.color.g(), v1.color.g(), v2.color.g()).ToFloat32() * | ||
| 483 | 255), | ||
| 484 | (u8)( | ||
| 485 | GetInterpolatedAttribute(v0.color.b(), v1.color.b(), v2.color.b()).ToFloat32() * | ||
| 486 | 255), | ||
| 487 | (u8)( | ||
| 488 | GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * | ||
| 489 | 255), | ||
| 468 | }; | 490 | }; |
| 469 | 491 | ||
| 470 | Math::Vec2<float24> uv[3]; | 492 | Math::Vec2<float24> uv[3]; |
| @@ -489,7 +511,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 489 | // Only unit 0 respects the texturing type (according to 3DBrew) | 511 | // Only unit 0 respects the texturing type (according to 3DBrew) |
| 490 | // TODO: Refactor so cubemaps and shadowmaps can be handled | 512 | // TODO: Refactor so cubemaps and shadowmaps can be handled |
| 491 | if (i == 0) { | 513 | if (i == 0) { |
| 492 | switch(texture.config.type) { | 514 | switch (texture.config.type) { |
| 493 | case Regs::TextureConfig::Texture2D: | 515 | case Regs::TextureConfig::Texture2D: |
| 494 | break; | 516 | break; |
| 495 | case Regs::TextureConfig::Projection2D: { | 517 | case Regs::TextureConfig::Projection2D: { |
| @@ -506,51 +528,58 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 506 | } | 528 | } |
| 507 | } | 529 | } |
| 508 | 530 | ||
| 509 | int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); | 531 | int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))) |
| 510 | int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); | 532 | .ToFloat32(); |
| 533 | int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) | ||
| 534 | .ToFloat32(); | ||
| 511 | 535 | ||
| 512 | 536 | static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, | |
| 513 | static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { | 537 | unsigned size) { |
| 514 | switch (mode) { | 538 | switch (mode) { |
| 515 | case Regs::TextureConfig::ClampToEdge: | 539 | case Regs::TextureConfig::ClampToEdge: |
| 516 | val = std::max(val, 0); | 540 | val = std::max(val, 0); |
| 517 | val = std::min(val, (int)size - 1); | 541 | val = std::min(val, (int)size - 1); |
| 518 | return val; | 542 | return val; |
| 519 | 543 | ||
| 520 | case Regs::TextureConfig::ClampToBorder: | 544 | case Regs::TextureConfig::ClampToBorder: |
| 521 | return val; | 545 | return val; |
| 522 | 546 | ||
| 523 | case Regs::TextureConfig::Repeat: | 547 | case Regs::TextureConfig::Repeat: |
| 524 | return (int)((unsigned)val % size); | 548 | return (int)((unsigned)val % size); |
| 525 | 549 | ||
| 526 | case Regs::TextureConfig::MirroredRepeat: | 550 | case Regs::TextureConfig::MirroredRepeat: { |
| 527 | { | 551 | unsigned int coord = ((unsigned)val % (2 * size)); |
| 528 | unsigned int coord = ((unsigned)val % (2 * size)); | 552 | if (coord >= size) |
| 529 | if (coord >= size) | 553 | coord = 2 * size - 1 - coord; |
| 530 | coord = 2 * size - 1 - coord; | 554 | return (int)coord; |
| 531 | return (int)coord; | 555 | } |
| 532 | } | 556 | |
| 533 | 557 | default: | |
| 534 | default: | 558 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); |
| 535 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); | 559 | UNIMPLEMENTED(); |
| 536 | UNIMPLEMENTED(); | 560 | return 0; |
| 537 | return 0; | ||
| 538 | } | 561 | } |
| 539 | }; | 562 | }; |
| 540 | 563 | ||
| 541 | if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && (s < 0 || s >= texture.config.width)) | 564 | if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && |
| 542 | || (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && (t < 0 || t >= texture.config.height))) { | 565 | (s < 0 || s >= texture.config.width)) || |
| 566 | (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && | ||
| 567 | (t < 0 || t >= texture.config.height))) { | ||
| 543 | auto border_color = texture.config.border_color; | 568 | auto border_color = texture.config.border_color; |
| 544 | texture_color[i] = { border_color.r, border_color.g, border_color.b, border_color.a }; | 569 | texture_color[i] = {border_color.r, border_color.g, border_color.b, |
| 570 | border_color.a}; | ||
| 545 | } else { | 571 | } else { |
| 546 | // Textures are laid out from bottom to top, hence we invert the t coordinate. | 572 | // Textures are laid out from bottom to top, hence we invert the t coordinate. |
| 547 | // NOTE: This may not be the right place for the inversion. | 573 | // NOTE: This may not be the right place for the inversion. |
| 548 | // TODO: Check if this applies to ETC textures, too. | 574 | // TODO: Check if this applies to ETC textures, too. |
| 549 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); | 575 | s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); |
| 550 | t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | 576 | t = texture.config.height - 1 - |
| 577 | GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); | ||
| 551 | 578 | ||
| 552 | u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); | 579 | u8* texture_data = |
| 553 | auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); | 580 | Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); |
| 581 | auto info = | ||
| 582 | DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); | ||
| 554 | 583 | ||
| 555 | // TODO: Apply the min and mag filters to the texture | 584 | // TODO: Apply the min and mag filters to the texture |
| 556 | texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); | 585 | texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); |
| @@ -571,10 +600,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 571 | Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; | 600 | Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; |
| 572 | Math::Vec4<u8> next_combiner_buffer = { | 601 | Math::Vec4<u8> next_combiner_buffer = { |
| 573 | regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, | 602 | regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, |
| 574 | regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a | 603 | regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a, |
| 575 | }; | 604 | }; |
| 576 | 605 | ||
| 577 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { | 606 | for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); |
| 607 | ++tev_stage_index) { | ||
| 578 | const auto& tev_stage = tev_stages[tev_stage_index]; | 608 | const auto& tev_stage = tev_stages[tev_stage_index]; |
| 579 | using Source = Regs::TevStageConfig::Source; | 609 | using Source = Regs::TevStageConfig::Source; |
| 580 | using ColorModifier = Regs::TevStageConfig::ColorModifier; | 610 | using ColorModifier = Regs::TevStageConfig::ColorModifier; |
| @@ -606,7 +636,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 606 | return combiner_buffer; | 636 | return combiner_buffer; |
| 607 | 637 | ||
| 608 | case Source::Constant: | 638 | case Source::Constant: |
| 609 | return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; | 639 | return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, |
| 640 | tev_stage.const_a}; | ||
| 610 | 641 | ||
| 611 | case Source::Previous: | 642 | case Source::Previous: |
| 612 | return combiner_output; | 643 | return combiner_output; |
| @@ -618,7 +649,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 618 | } | 649 | } |
| 619 | }; | 650 | }; |
| 620 | 651 | ||
| 621 | static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | 652 | static auto GetColorModifier = [](ColorModifier factor, |
| 653 | const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | ||
| 622 | switch (factor) { | 654 | switch (factor) { |
| 623 | case ColorModifier::SourceColor: | 655 | case ColorModifier::SourceColor: |
| 624 | return values.rgb(); | 656 | return values.rgb(); |
| @@ -652,7 +684,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 652 | } | 684 | } |
| 653 | }; | 685 | }; |
| 654 | 686 | ||
| 655 | static auto GetAlphaModifier = [](AlphaModifier factor, const Math::Vec4<u8>& values) -> u8 { | 687 | static auto GetAlphaModifier = [](AlphaModifier factor, |
| 688 | const Math::Vec4<u8>& values) -> u8 { | ||
| 656 | switch (factor) { | 689 | switch (factor) { |
| 657 | case AlphaModifier::SourceAlpha: | 690 | case AlphaModifier::SourceAlpha: |
| 658 | return values.a(); | 691 | return values.a(); |
| @@ -680,7 +713,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 680 | } | 713 | } |
| 681 | }; | 714 | }; |
| 682 | 715 | ||
| 683 | static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | 716 | static auto ColorCombine = [](Operation op, |
| 717 | const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | ||
| 684 | switch (op) { | 718 | switch (op) { |
| 685 | case Operation::Replace: | 719 | case Operation::Replace: |
| 686 | return input[0]; | 720 | return input[0]; |
| @@ -688,8 +722,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 688 | case Operation::Modulate: | 722 | case Operation::Modulate: |
| 689 | return ((input[0] * input[1]) / 255).Cast<u8>(); | 723 | return ((input[0] * input[1]) / 255).Cast<u8>(); |
| 690 | 724 | ||
| 691 | case Operation::Add: | 725 | case Operation::Add: { |
| 692 | { | ||
| 693 | auto result = input[0] + input[1]; | 726 | auto result = input[0] + input[1]; |
| 694 | result.r() = std::min(255, result.r()); | 727 | result.r() = std::min(255, result.r()); |
| 695 | result.g() = std::min(255, result.g()); | 728 | result.g() = std::min(255, result.g()); |
| @@ -697,10 +730,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 697 | return result.Cast<u8>(); | 730 | return result.Cast<u8>(); |
| 698 | } | 731 | } |
| 699 | 732 | ||
| 700 | case Operation::AddSigned: | 733 | case Operation::AddSigned: { |
| 701 | { | 734 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to |
| 702 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct | 735 | // (byte) 128 is correct |
| 703 | auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); | 736 | auto result = input[0].Cast<int>() + input[1].Cast<int>() - |
| 737 | Math::MakeVec<int>(128, 128, 128); | ||
| 704 | result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | 738 | result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); |
| 705 | result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | 739 | result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); |
| 706 | result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | 740 | result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); |
| @@ -708,10 +742,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 708 | } | 742 | } |
| 709 | 743 | ||
| 710 | case Operation::Lerp: | 744 | case Operation::Lerp: |
| 711 | return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); | 745 | return ((input[0] * input[2] + |
| 746 | input[1] * | ||
| 747 | (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / | ||
| 748 | 255) | ||
| 749 | .Cast<u8>(); | ||
| 712 | 750 | ||
| 713 | case Operation::Subtract: | 751 | case Operation::Subtract: { |
| 714 | { | ||
| 715 | auto result = input[0].Cast<int>() - input[1].Cast<int>(); | 752 | auto result = input[0].Cast<int>() - input[1].Cast<int>(); |
| 716 | result.r() = std::max(0, result.r()); | 753 | result.r() = std::max(0, result.r()); |
| 717 | result.g() = std::max(0, result.g()); | 754 | result.g() = std::max(0, result.g()); |
| @@ -719,8 +756,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 719 | return result.Cast<u8>(); | 756 | return result.Cast<u8>(); |
| 720 | } | 757 | } |
| 721 | 758 | ||
| 722 | case Operation::MultiplyThenAdd: | 759 | case Operation::MultiplyThenAdd: { |
| 723 | { | ||
| 724 | auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | 760 | auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; |
| 725 | result.r() = std::min(255, result.r()); | 761 | result.r() = std::min(255, result.r()); |
| 726 | result.g() = std::min(255, result.g()); | 762 | result.g() = std::min(255, result.g()); |
| @@ -728,8 +764,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 728 | return result.Cast<u8>(); | 764 | return result.Cast<u8>(); |
| 729 | } | 765 | } |
| 730 | 766 | ||
| 731 | case Operation::AddThenMultiply: | 767 | case Operation::AddThenMultiply: { |
| 732 | { | ||
| 733 | auto result = input[0] + input[1]; | 768 | auto result = input[0] + input[1]; |
| 734 | result.r() = std::min(255, result.r()); | 769 | result.r() = std::min(255, result.r()); |
| 735 | result.g() = std::min(255, result.g()); | 770 | result.g() = std::min(255, result.g()); |
| @@ -737,17 +772,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 737 | result = (result * input[2].Cast<int>()) / 255; | 772 | result = (result * input[2].Cast<int>()) / 255; |
| 738 | return result.Cast<u8>(); | 773 | return result.Cast<u8>(); |
| 739 | } | 774 | } |
| 740 | case Operation::Dot3_RGB: | 775 | case Operation::Dot3_RGB: { |
| 741 | { | ||
| 742 | // Not fully accurate. | 776 | // Not fully accurate. |
| 743 | // Worst case scenario seems to yield a +/-3 error | 777 | // Worst case scenario seems to yield a +/-3 error |
| 744 | // Some HW results indicate that the per-component computation can't have a higher precision than 1/256, | 778 | // Some HW results indicate that the per-component computation can't have a |
| 745 | // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results | 779 | // higher precision than 1/256, |
| 746 | int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | 780 | // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( |
| 747 | ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | 781 | // (0x80,g0,b0),(0x80,g1,b1) ) give different results |
| 748 | ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | 782 | int result = |
| 783 | ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||
| 784 | ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||
| 785 | ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||
| 749 | result = std::max(0, std::min(255, result)); | 786 | result = std::max(0, std::min(255, result)); |
| 750 | return { (u8)result, (u8)result, (u8)result }; | 787 | return {(u8)result, (u8)result, (u8)result}; |
| 751 | } | 788 | } |
| 752 | default: | 789 | default: |
| 753 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); | 790 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); |
| @@ -756,7 +793,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 756 | } | 793 | } |
| 757 | }; | 794 | }; |
| 758 | 795 | ||
| 759 | static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { | 796 | static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 { |
| 760 | switch (op) { | 797 | switch (op) { |
| 761 | case Operation::Replace: | 798 | case Operation::Replace: |
| 762 | return input[0]; | 799 | return input[0]; |
| @@ -767,9 +804,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 767 | case Operation::Add: | 804 | case Operation::Add: |
| 768 | return std::min(255, input[0] + input[1]); | 805 | return std::min(255, input[0] + input[1]); |
| 769 | 806 | ||
| 770 | case Operation::AddSigned: | 807 | case Operation::AddSigned: { |
| 771 | { | 808 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to |
| 772 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct | 809 | // (byte) 128 is correct |
| 773 | auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; | 810 | auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; |
| 774 | return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); | 811 | return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); |
| 775 | } | 812 | } |
| @@ -801,32 +838,38 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 801 | Math::Vec3<u8> color_result[3] = { | 838 | Math::Vec3<u8> color_result[3] = { |
| 802 | GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), | 839 | GetColorModifier(tev_stage.color_modifier1, GetSource(tev_stage.color_source1)), |
| 803 | GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), | 840 | GetColorModifier(tev_stage.color_modifier2, GetSource(tev_stage.color_source2)), |
| 804 | GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)) | 841 | GetColorModifier(tev_stage.color_modifier3, GetSource(tev_stage.color_source3)), |
| 805 | }; | 842 | }; |
| 806 | auto color_output = ColorCombine(tev_stage.color_op, color_result); | 843 | auto color_output = ColorCombine(tev_stage.color_op, color_result); |
| 807 | 844 | ||
| 808 | // alpha combiner | 845 | // alpha combiner |
| 809 | std::array<u8,3> alpha_result = {{ | 846 | std::array<u8, 3> alpha_result = {{ |
| 810 | GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)), | 847 | GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)), |
| 811 | GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)), | 848 | GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)), |
| 812 | GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)) | 849 | GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)), |
| 813 | }}; | 850 | }}; |
| 814 | auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); | 851 | auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); |
| 815 | 852 | ||
| 816 | combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); | 853 | combiner_output[0] = |
| 817 | combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); | 854 | std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); |
| 818 | combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); | 855 | combiner_output[1] = |
| 819 | combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); | 856 | std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); |
| 857 | combiner_output[2] = | ||
| 858 | std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); | ||
| 859 | combiner_output[3] = | ||
| 860 | std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); | ||
| 820 | 861 | ||
| 821 | combiner_buffer = next_combiner_buffer; | 862 | combiner_buffer = next_combiner_buffer; |
| 822 | 863 | ||
| 823 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { | 864 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor( |
| 865 | tev_stage_index)) { | ||
| 824 | next_combiner_buffer.r() = combiner_output.r(); | 866 | next_combiner_buffer.r() = combiner_output.r(); |
| 825 | next_combiner_buffer.g() = combiner_output.g(); | 867 | next_combiner_buffer.g() = combiner_output.g(); |
| 826 | next_combiner_buffer.b() = combiner_output.b(); | 868 | next_combiner_buffer.b() = combiner_output.b(); |
| 827 | } | 869 | } |
| 828 | 870 | ||
| 829 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { | 871 | if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha( |
| 872 | tev_stage_index)) { | ||
| 830 | next_combiner_buffer.a() = combiner_output.a(); | 873 | next_combiner_buffer.a() = combiner_output.a(); |
| 831 | } | 874 | } |
| 832 | } | 875 | } |
| @@ -897,21 +940,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 897 | float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); | 940 | float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); |
| 898 | float fog_f = fog_index - fog_i; | 941 | float fog_f = fog_index - fog_i; |
| 899 | const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; | 942 | const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; |
| 900 | float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / 2047.0f; // This is signed fixed point 1.11 | 943 | float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / |
| 944 | 2047.0f; // This is signed fixed point 1.11 | ||
| 901 | fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); | 945 | fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); |
| 902 | 946 | ||
| 903 | // Blend the fog | 947 | // Blend the fog |
| 904 | for (unsigned i = 0; i < 3; i++) { | 948 | for (unsigned i = 0; i < 3; i++) { |
| 905 | combiner_output[i] = fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; | 949 | combiner_output[i] = |
| 950 | fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; | ||
| 906 | } | 951 | } |
| 907 | } | 952 | } |
| 908 | 953 | ||
| 909 | u8 old_stencil = 0; | 954 | u8 old_stencil = 0; |
| 910 | 955 | ||
| 911 | auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { | 956 | auto UpdateStencil = [stencil_test, x, y, |
| 912 | u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); | 957 | &old_stencil](Pica::Regs::StencilAction action) { |
| 958 | u8 new_stencil = | ||
| 959 | PerformStencilAction(action, old_stencil, stencil_test.reference_value); | ||
| 913 | if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) | 960 | if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) |
| 914 | SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); | 961 | SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | |
| 962 | (old_stencil & ~stencil_test.write_mask)); | ||
| 915 | }; | 963 | }; |
| 916 | 964 | ||
| 917 | if (stencil_action_enable) { | 965 | if (stencil_action_enable) { |
| @@ -1030,7 +1078,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1030 | static_cast<u8>(output_merger.blend_const.r), | 1078 | static_cast<u8>(output_merger.blend_const.r), |
| 1031 | static_cast<u8>(output_merger.blend_const.g), | 1079 | static_cast<u8>(output_merger.blend_const.g), |
| 1032 | static_cast<u8>(output_merger.blend_const.b), | 1080 | static_cast<u8>(output_merger.blend_const.b), |
| 1033 | static_cast<u8>(output_merger.blend_const.a) | 1081 | static_cast<u8>(output_merger.blend_const.a), |
| 1034 | }; | 1082 | }; |
| 1035 | 1083 | ||
| 1036 | switch (factor) { | 1084 | switch (factor) { |
| @@ -1091,12 +1139,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1091 | return combiner_output[channel]; | 1139 | return combiner_output[channel]; |
| 1092 | }; | 1140 | }; |
| 1093 | 1141 | ||
| 1094 | static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | 1142 | static auto EvaluateBlendEquation = []( |
| 1095 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | 1143 | const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, |
| 1096 | Regs::BlendEquation equation) { | 1144 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, |
| 1145 | Regs::BlendEquation equation) { | ||
| 1097 | Math::Vec4<int> result; | 1146 | Math::Vec4<int> result; |
| 1098 | 1147 | ||
| 1099 | auto src_result = (src * srcfactor).Cast<int>(); | 1148 | auto src_result = (src * srcfactor).Cast<int>(); |
| 1100 | auto dst_result = (dest * destfactor).Cast<int>(); | 1149 | auto dst_result = (dest * destfactor).Cast<int>(); |
| 1101 | 1150 | ||
| 1102 | switch (equation) { | 1151 | switch (equation) { |
| @@ -1134,10 +1183,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1134 | UNIMPLEMENTED(); | 1183 | UNIMPLEMENTED(); |
| 1135 | } | 1184 | } |
| 1136 | 1185 | ||
| 1137 | return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), | 1186 | return Math::Vec4<u8>( |
| 1138 | MathUtil::Clamp(result.g(), 0, 255), | 1187 | MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), |
| 1139 | MathUtil::Clamp(result.b(), 0, 255), | 1188 | MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); |
| 1140 | MathUtil::Clamp(result.a(), 0, 255)); | ||
| 1141 | }; | 1189 | }; |
| 1142 | 1190 | ||
| 1143 | auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), | 1191 | auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), |
| @@ -1150,8 +1198,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1150 | LookupFactor(2, params.factor_dest_rgb), | 1198 | LookupFactor(2, params.factor_dest_rgb), |
| 1151 | LookupFactor(3, params.factor_dest_a)); | 1199 | LookupFactor(3, params.factor_dest_a)); |
| 1152 | 1200 | ||
| 1153 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); | 1201 | blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, |
| 1154 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); | 1202 | params.blend_equation_rgb); |
| 1203 | blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, | ||
| 1204 | dstfactor, params.blend_equation_a) | ||
| 1205 | .a(); | ||
| 1155 | } else { | 1206 | } else { |
| 1156 | static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { | 1207 | static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { |
| 1157 | switch (op) { | 1208 | switch (op) { |
| @@ -1205,18 +1256,18 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1205 | } | 1256 | } |
| 1206 | }; | 1257 | }; |
| 1207 | 1258 | ||
| 1208 | blend_output = Math::MakeVec( | 1259 | blend_output = |
| 1209 | LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), | 1260 | Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), |
| 1210 | LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), | 1261 | LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), |
| 1211 | LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), | 1262 | LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), |
| 1212 | LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); | 1263 | LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); |
| 1213 | } | 1264 | } |
| 1214 | 1265 | ||
| 1215 | const Math::Vec4<u8> result = { | 1266 | const Math::Vec4<u8> result = { |
| 1216 | output_merger.red_enable ? blend_output.r() : dest.r(), | 1267 | output_merger.red_enable ? blend_output.r() : dest.r(), |
| 1217 | output_merger.green_enable ? blend_output.g() : dest.g(), | 1268 | output_merger.green_enable ? blend_output.g() : dest.g(), |
| 1218 | output_merger.blue_enable ? blend_output.b() : dest.b(), | 1269 | output_merger.blue_enable ? blend_output.b() : dest.b(), |
| 1219 | output_merger.alpha_enable ? blend_output.a() : dest.a() | 1270 | output_merger.alpha_enable ? blend_output.a() : dest.a(), |
| 1220 | }; | 1271 | }; |
| 1221 | 1272 | ||
| 1222 | if (regs.framebuffer.allow_color_write != 0) | 1273 | if (regs.framebuffer.allow_color_write != 0) |
| @@ -1225,8 +1276,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, | |||
| 1225 | } | 1276 | } |
| 1226 | } | 1277 | } |
| 1227 | 1278 | ||
| 1228 | void ProcessTriangle(const Shader::OutputVertex& v0, | 1279 | void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, |
| 1229 | const Shader::OutputVertex& v1, | ||
| 1230 | const Shader::OutputVertex& v2) { | 1280 | const Shader::OutputVertex& v2) { |
| 1231 | ProcessTriangleInternal(v0, v1, v2); | 1281 | ProcessTriangleInternal(v0, v1, v2); |
| 1232 | } | 1282 | } |