diff options
| author | 2017-01-29 18:10:26 -0800 | |
|---|---|---|
| committer | 2017-02-12 18:11:05 -0800 | |
| commit | f9026e8a7a53073340f7188f433f81fe84a16976 (patch) | |
| tree | b5af88d2b17b8305227661e6f9c798101477f4fb /src | |
| parent | SWRasterizer: Move framebuffer operation functions to their own file (diff) | |
| download | yuzu-f9026e8a7a53073340f7188f433f81fe84a16976.tar.gz yuzu-f9026e8a7a53073340f7188f433f81fe84a16976.tar.xz yuzu-f9026e8a7a53073340f7188f433f81fe84a16976.zip | |
SWRasterizer: Convert large no-capture lambdas to standalone functions
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/swrasterizer/rasterizer.cpp | 625 |
1 files changed, 310 insertions, 315 deletions
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index cb11338b7..7044a6136 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp | |||
| @@ -28,9 +28,318 @@ | |||
| 28 | #include "video_core/utils.h" | 28 | #include "video_core/utils.h" |
| 29 | 29 | ||
| 30 | namespace Pica { | 30 | namespace Pica { |
| 31 | |||
| 32 | namespace Rasterizer { | 31 | namespace Rasterizer { |
| 33 | 32 | ||
| 33 | using TevStageConfig = TexturingRegs::TevStageConfig; | ||
| 34 | |||
| 35 | static int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) { | ||
| 36 | switch (mode) { | ||
| 37 | case TexturingRegs::TextureConfig::ClampToEdge: | ||
| 38 | val = std::max(val, 0); | ||
| 39 | val = std::min(val, (int)size - 1); | ||
| 40 | return val; | ||
| 41 | |||
| 42 | case TexturingRegs::TextureConfig::ClampToBorder: | ||
| 43 | return val; | ||
| 44 | |||
| 45 | case TexturingRegs::TextureConfig::Repeat: | ||
| 46 | return (int)((unsigned)val % size); | ||
| 47 | |||
| 48 | case TexturingRegs::TextureConfig::MirroredRepeat: { | ||
| 49 | unsigned int coord = ((unsigned)val % (2 * size)); | ||
| 50 | if (coord >= size) | ||
| 51 | coord = 2 * size - 1 - coord; | ||
| 52 | return (int)coord; | ||
| 53 | } | ||
| 54 | |||
| 55 | default: | ||
| 56 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); | ||
| 57 | UNIMPLEMENTED(); | ||
| 58 | return 0; | ||
| 59 | } | ||
| 60 | }; | ||
| 61 | |||
| 62 | static Math::Vec3<u8> GetColorModifier(TevStageConfig::ColorModifier factor, | ||
| 63 | const Math::Vec4<u8>& values) { | ||
| 64 | using ColorModifier = TevStageConfig::ColorModifier; | ||
| 65 | |||
| 66 | switch (factor) { | ||
| 67 | case ColorModifier::SourceColor: | ||
| 68 | return values.rgb(); | ||
| 69 | |||
| 70 | case ColorModifier::OneMinusSourceColor: | ||
| 71 | return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); | ||
| 72 | |||
| 73 | case ColorModifier::SourceAlpha: | ||
| 74 | return values.aaa(); | ||
| 75 | |||
| 76 | case ColorModifier::OneMinusSourceAlpha: | ||
| 77 | return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>(); | ||
| 78 | |||
| 79 | case ColorModifier::SourceRed: | ||
| 80 | return values.rrr(); | ||
| 81 | |||
| 82 | case ColorModifier::OneMinusSourceRed: | ||
| 83 | return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>(); | ||
| 84 | |||
| 85 | case ColorModifier::SourceGreen: | ||
| 86 | return values.ggg(); | ||
| 87 | |||
| 88 | case ColorModifier::OneMinusSourceGreen: | ||
| 89 | return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>(); | ||
| 90 | |||
| 91 | case ColorModifier::SourceBlue: | ||
| 92 | return values.bbb(); | ||
| 93 | |||
| 94 | case ColorModifier::OneMinusSourceBlue: | ||
| 95 | return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); | ||
| 96 | } | ||
| 97 | }; | ||
| 98 | |||
| 99 | static u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& values) { | ||
| 100 | using AlphaModifier = TevStageConfig::AlphaModifier; | ||
| 101 | |||
| 102 | switch (factor) { | ||
| 103 | case AlphaModifier::SourceAlpha: | ||
| 104 | return values.a(); | ||
| 105 | |||
| 106 | case AlphaModifier::OneMinusSourceAlpha: | ||
| 107 | return 255 - values.a(); | ||
| 108 | |||
| 109 | case AlphaModifier::SourceRed: | ||
| 110 | return values.r(); | ||
| 111 | |||
| 112 | case AlphaModifier::OneMinusSourceRed: | ||
| 113 | return 255 - values.r(); | ||
| 114 | |||
| 115 | case AlphaModifier::SourceGreen: | ||
| 116 | return values.g(); | ||
| 117 | |||
| 118 | case AlphaModifier::OneMinusSourceGreen: | ||
| 119 | return 255 - values.g(); | ||
| 120 | |||
| 121 | case AlphaModifier::SourceBlue: | ||
| 122 | return values.b(); | ||
| 123 | |||
| 124 | case AlphaModifier::OneMinusSourceBlue: | ||
| 125 | return 255 - values.b(); | ||
| 126 | } | ||
| 127 | }; | ||
| 128 | |||
| 129 | static Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> input[3]) { | ||
| 130 | using Operation = TevStageConfig::Operation; | ||
| 131 | |||
| 132 | switch (op) { | ||
| 133 | case Operation::Replace: | ||
| 134 | return input[0]; | ||
| 135 | |||
| 136 | case Operation::Modulate: | ||
| 137 | return ((input[0] * input[1]) / 255).Cast<u8>(); | ||
| 138 | |||
| 139 | case Operation::Add: { | ||
| 140 | auto result = input[0] + input[1]; | ||
| 141 | result.r() = std::min(255, result.r()); | ||
| 142 | result.g() = std::min(255, result.g()); | ||
| 143 | result.b() = std::min(255, result.b()); | ||
| 144 | return result.Cast<u8>(); | ||
| 145 | } | ||
| 146 | |||
| 147 | case Operation::AddSigned: { | ||
| 148 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to | ||
| 149 | // (byte) 128 is correct | ||
| 150 | auto result = | ||
| 151 | input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); | ||
| 152 | result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | ||
| 153 | result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | ||
| 154 | result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | ||
| 155 | return result.Cast<u8>(); | ||
| 156 | } | ||
| 157 | |||
| 158 | case Operation::Lerp: | ||
| 159 | return ((input[0] * input[2] + | ||
| 160 | input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / | ||
| 161 | 255) | ||
| 162 | .Cast<u8>(); | ||
| 163 | |||
| 164 | case Operation::Subtract: { | ||
| 165 | auto result = input[0].Cast<int>() - input[1].Cast<int>(); | ||
| 166 | result.r() = std::max(0, result.r()); | ||
| 167 | result.g() = std::max(0, result.g()); | ||
| 168 | result.b() = std::max(0, result.b()); | ||
| 169 | return result.Cast<u8>(); | ||
| 170 | } | ||
| 171 | |||
| 172 | case Operation::MultiplyThenAdd: { | ||
| 173 | auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | ||
| 174 | result.r() = std::min(255, result.r()); | ||
| 175 | result.g() = std::min(255, result.g()); | ||
| 176 | result.b() = std::min(255, result.b()); | ||
| 177 | return result.Cast<u8>(); | ||
| 178 | } | ||
| 179 | |||
| 180 | case Operation::AddThenMultiply: { | ||
| 181 | auto result = input[0] + input[1]; | ||
| 182 | result.r() = std::min(255, result.r()); | ||
| 183 | result.g() = std::min(255, result.g()); | ||
| 184 | result.b() = std::min(255, result.b()); | ||
| 185 | result = (result * input[2].Cast<int>()) / 255; | ||
| 186 | return result.Cast<u8>(); | ||
| 187 | } | ||
| 188 | case Operation::Dot3_RGB: { | ||
| 189 | // Not fully accurate. Worst case scenario seems to yield a +/-3 error. Some HW results | ||
| 190 | // indicate that the per-component computation can't have a higher precision than 1/256, | ||
| 191 | // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give | ||
| 192 | // different results. | ||
| 193 | int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||
| 194 | ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||
| 195 | ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||
| 196 | result = std::max(0, std::min(255, result)); | ||
| 197 | return {(u8)result, (u8)result, (u8)result}; | ||
| 198 | } | ||
| 199 | default: | ||
| 200 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); | ||
| 201 | UNIMPLEMENTED(); | ||
| 202 | return {0, 0, 0}; | ||
| 203 | } | ||
| 204 | }; | ||
| 205 | |||
| 206 | static u8 AlphaCombine(TevStageConfig::Operation op, const std::array<u8, 3>& input) { | ||
| 207 | switch (op) { | ||
| 208 | using Operation = TevStageConfig::Operation; | ||
| 209 | case Operation::Replace: | ||
| 210 | return input[0]; | ||
| 211 | |||
| 212 | case Operation::Modulate: | ||
| 213 | return input[0] * input[1] / 255; | ||
| 214 | |||
| 215 | case Operation::Add: | ||
| 216 | return std::min(255, input[0] + input[1]); | ||
| 217 | |||
| 218 | case Operation::AddSigned: { | ||
| 219 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct | ||
| 220 | auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; | ||
| 221 | return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); | ||
| 222 | } | ||
| 223 | |||
| 224 | case Operation::Lerp: | ||
| 225 | return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; | ||
| 226 | |||
| 227 | case Operation::Subtract: | ||
| 228 | return std::max(0, (int)input[0] - (int)input[1]); | ||
| 229 | |||
| 230 | case Operation::MultiplyThenAdd: | ||
| 231 | return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); | ||
| 232 | |||
| 233 | case Operation::AddThenMultiply: | ||
| 234 | return (std::min(255, (input[0] + input[1])) * input[2]) / 255; | ||
| 235 | |||
| 236 | default: | ||
| 237 | LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op); | ||
| 238 | UNIMPLEMENTED(); | ||
| 239 | return 0; | ||
| 240 | } | ||
| 241 | }; | ||
| 242 | |||
| 243 | static Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src, | ||
| 244 | const Math::Vec4<u8>& srcfactor, | ||
| 245 | const Math::Vec4<u8>& dest, | ||
| 246 | const Math::Vec4<u8>& destfactor, | ||
| 247 | FramebufferRegs::BlendEquation equation) { | ||
| 248 | Math::Vec4<int> result; | ||
| 249 | |||
| 250 | auto src_result = (src * srcfactor).Cast<int>(); | ||
| 251 | auto dst_result = (dest * destfactor).Cast<int>(); | ||
| 252 | |||
| 253 | switch (equation) { | ||
| 254 | case FramebufferRegs::BlendEquation::Add: | ||
| 255 | result = (src_result + dst_result) / 255; | ||
| 256 | break; | ||
| 257 | |||
| 258 | case FramebufferRegs::BlendEquation::Subtract: | ||
| 259 | result = (src_result - dst_result) / 255; | ||
| 260 | break; | ||
| 261 | |||
| 262 | case FramebufferRegs::BlendEquation::ReverseSubtract: | ||
| 263 | result = (dst_result - src_result) / 255; | ||
| 264 | break; | ||
| 265 | |||
| 266 | // TODO: How do these two actually work? OpenGL doesn't include the blend factors in the | ||
| 267 | // min/max computations, but is this what the 3DS actually does? | ||
| 268 | case FramebufferRegs::BlendEquation::Min: | ||
| 269 | result.r() = std::min(src.r(), dest.r()); | ||
| 270 | result.g() = std::min(src.g(), dest.g()); | ||
| 271 | result.b() = std::min(src.b(), dest.b()); | ||
| 272 | result.a() = std::min(src.a(), dest.a()); | ||
| 273 | break; | ||
| 274 | |||
| 275 | case FramebufferRegs::BlendEquation::Max: | ||
| 276 | result.r() = std::max(src.r(), dest.r()); | ||
| 277 | result.g() = std::max(src.g(), dest.g()); | ||
| 278 | result.b() = std::max(src.b(), dest.b()); | ||
| 279 | result.a() = std::max(src.a(), dest.a()); | ||
| 280 | break; | ||
| 281 | |||
| 282 | default: | ||
| 283 | LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); | ||
| 284 | UNIMPLEMENTED(); | ||
| 285 | } | ||
| 286 | |||
| 287 | return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), | ||
| 288 | MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); | ||
| 289 | }; | ||
| 290 | |||
| 291 | static u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) { | ||
| 292 | switch (op) { | ||
| 293 | case FramebufferRegs::LogicOp::Clear: | ||
| 294 | return 0; | ||
| 295 | |||
| 296 | case FramebufferRegs::LogicOp::And: | ||
| 297 | return src & dest; | ||
| 298 | |||
| 299 | case FramebufferRegs::LogicOp::AndReverse: | ||
| 300 | return src & ~dest; | ||
| 301 | |||
| 302 | case FramebufferRegs::LogicOp::Copy: | ||
| 303 | return src; | ||
| 304 | |||
| 305 | case FramebufferRegs::LogicOp::Set: | ||
| 306 | return 255; | ||
| 307 | |||
| 308 | case FramebufferRegs::LogicOp::CopyInverted: | ||
| 309 | return ~src; | ||
| 310 | |||
| 311 | case FramebufferRegs::LogicOp::NoOp: | ||
| 312 | return dest; | ||
| 313 | |||
| 314 | case FramebufferRegs::LogicOp::Invert: | ||
| 315 | return ~dest; | ||
| 316 | |||
| 317 | case FramebufferRegs::LogicOp::Nand: | ||
| 318 | return ~(src & dest); | ||
| 319 | |||
| 320 | case FramebufferRegs::LogicOp::Or: | ||
| 321 | return src | dest; | ||
| 322 | |||
| 323 | case FramebufferRegs::LogicOp::Nor: | ||
| 324 | return ~(src | dest); | ||
| 325 | |||
| 326 | case FramebufferRegs::LogicOp::Xor: | ||
| 327 | return src ^ dest; | ||
| 328 | |||
| 329 | case FramebufferRegs::LogicOp::Equiv: | ||
| 330 | return ~(src ^ dest); | ||
| 331 | |||
| 332 | case FramebufferRegs::LogicOp::AndInverted: | ||
| 333 | return ~src & dest; | ||
| 334 | |||
| 335 | case FramebufferRegs::LogicOp::OrReverse: | ||
| 336 | return src | ~dest; | ||
| 337 | |||
| 338 | case FramebufferRegs::LogicOp::OrInverted: | ||
| 339 | return ~src | dest; | ||
| 340 | } | ||
| 341 | }; | ||
| 342 | |||
| 34 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values | 343 | // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values |
| 35 | struct Fix12P4 { | 344 | struct Fix12P4 { |
| 36 | Fix12P4() {} | 345 | Fix12P4() {} |
| @@ -304,34 +613,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
| 304 | int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) | 613 | int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) |
| 305 | .ToFloat32(); | 614 | .ToFloat32(); |
| 306 | 615 | ||
| 307 | static auto GetWrappedTexCoord = [](TexturingRegs::TextureConfig::WrapMode mode, | ||
| 308 | int val, unsigned size) { | ||
| 309 | switch (mode) { | ||
| 310 | case TexturingRegs::TextureConfig::ClampToEdge: | ||
| 311 | val = std::max(val, 0); | ||
| 312 | val = std::min(val, (int)size - 1); | ||
| 313 | return val; | ||
| 314 | |||
| 315 | case TexturingRegs::TextureConfig::ClampToBorder: | ||
| 316 | return val; | ||
| 317 | |||
| 318 | case TexturingRegs::TextureConfig::Repeat: | ||
| 319 | return (int)((unsigned)val % size); | ||
| 320 | |||
| 321 | case TexturingRegs::TextureConfig::MirroredRepeat: { | ||
| 322 | unsigned int coord = ((unsigned)val % (2 * size)); | ||
| 323 | if (coord >= size) | ||
| 324 | coord = 2 * size - 1 - coord; | ||
| 325 | return (int)coord; | ||
| 326 | } | ||
| 327 | |||
| 328 | default: | ||
| 329 | LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode); | ||
| 330 | UNIMPLEMENTED(); | ||
| 331 | return 0; | ||
| 332 | } | ||
| 333 | }; | ||
| 334 | |||
| 335 | if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder && | 616 | if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder && |
| 336 | (s < 0 || static_cast<u32>(s) >= texture.config.width)) || | 617 | (s < 0 || static_cast<u32>(s) >= texture.config.width)) || |
| 337 | (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder && | 618 | (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder && |
| @@ -380,9 +661,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
| 380 | ++tev_stage_index) { | 661 | ++tev_stage_index) { |
| 381 | const auto& tev_stage = tev_stages[tev_stage_index]; | 662 | const auto& tev_stage = tev_stages[tev_stage_index]; |
| 382 | using Source = TexturingRegs::TevStageConfig::Source; | 663 | using Source = TexturingRegs::TevStageConfig::Source; |
| 383 | using ColorModifier = TexturingRegs::TevStageConfig::ColorModifier; | ||
| 384 | using AlphaModifier = TexturingRegs::TevStageConfig::AlphaModifier; | ||
| 385 | using Operation = TexturingRegs::TevStageConfig::Operation; | ||
| 386 | 664 | ||
| 387 | auto GetSource = [&](Source source) -> Math::Vec4<u8> { | 665 | auto GetSource = [&](Source source) -> Math::Vec4<u8> { |
| 388 | switch (source) { | 666 | switch (source) { |
| @@ -422,187 +700,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
| 422 | } | 700 | } |
| 423 | }; | 701 | }; |
| 424 | 702 | ||
| 425 | static auto GetColorModifier = [](ColorModifier factor, | ||
| 426 | const Math::Vec4<u8>& values) -> Math::Vec3<u8> { | ||
| 427 | switch (factor) { | ||
| 428 | case ColorModifier::SourceColor: | ||
| 429 | return values.rgb(); | ||
| 430 | |||
| 431 | case ColorModifier::OneMinusSourceColor: | ||
| 432 | return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); | ||
| 433 | |||
| 434 | case ColorModifier::SourceAlpha: | ||
| 435 | return values.aaa(); | ||
| 436 | |||
| 437 | case ColorModifier::OneMinusSourceAlpha: | ||
| 438 | return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>(); | ||
| 439 | |||
| 440 | case ColorModifier::SourceRed: | ||
| 441 | return values.rrr(); | ||
| 442 | |||
| 443 | case ColorModifier::OneMinusSourceRed: | ||
| 444 | return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>(); | ||
| 445 | |||
| 446 | case ColorModifier::SourceGreen: | ||
| 447 | return values.ggg(); | ||
| 448 | |||
| 449 | case ColorModifier::OneMinusSourceGreen: | ||
| 450 | return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>(); | ||
| 451 | |||
| 452 | case ColorModifier::SourceBlue: | ||
| 453 | return values.bbb(); | ||
| 454 | |||
| 455 | case ColorModifier::OneMinusSourceBlue: | ||
| 456 | return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>(); | ||
| 457 | } | ||
| 458 | }; | ||
| 459 | |||
| 460 | static auto GetAlphaModifier = [](AlphaModifier factor, | ||
| 461 | const Math::Vec4<u8>& values) -> u8 { | ||
| 462 | switch (factor) { | ||
| 463 | case AlphaModifier::SourceAlpha: | ||
| 464 | return values.a(); | ||
| 465 | |||
| 466 | case AlphaModifier::OneMinusSourceAlpha: | ||
| 467 | return 255 - values.a(); | ||
| 468 | |||
| 469 | case AlphaModifier::SourceRed: | ||
| 470 | return values.r(); | ||
| 471 | |||
| 472 | case AlphaModifier::OneMinusSourceRed: | ||
| 473 | return 255 - values.r(); | ||
| 474 | |||
| 475 | case AlphaModifier::SourceGreen: | ||
| 476 | return values.g(); | ||
| 477 | |||
| 478 | case AlphaModifier::OneMinusSourceGreen: | ||
| 479 | return 255 - values.g(); | ||
| 480 | |||
| 481 | case AlphaModifier::SourceBlue: | ||
| 482 | return values.b(); | ||
| 483 | |||
| 484 | case AlphaModifier::OneMinusSourceBlue: | ||
| 485 | return 255 - values.b(); | ||
| 486 | } | ||
| 487 | }; | ||
| 488 | |||
| 489 | static auto ColorCombine = [](Operation op, | ||
| 490 | const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { | ||
| 491 | switch (op) { | ||
| 492 | case Operation::Replace: | ||
| 493 | return input[0]; | ||
| 494 | |||
| 495 | case Operation::Modulate: | ||
| 496 | return ((input[0] * input[1]) / 255).Cast<u8>(); | ||
| 497 | |||
| 498 | case Operation::Add: { | ||
| 499 | auto result = input[0] + input[1]; | ||
| 500 | result.r() = std::min(255, result.r()); | ||
| 501 | result.g() = std::min(255, result.g()); | ||
| 502 | result.b() = std::min(255, result.b()); | ||
| 503 | return result.Cast<u8>(); | ||
| 504 | } | ||
| 505 | |||
| 506 | case Operation::AddSigned: { | ||
| 507 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to | ||
| 508 | // (byte) 128 is correct | ||
| 509 | auto result = input[0].Cast<int>() + input[1].Cast<int>() - | ||
| 510 | Math::MakeVec<int>(128, 128, 128); | ||
| 511 | result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); | ||
| 512 | result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); | ||
| 513 | result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); | ||
| 514 | return result.Cast<u8>(); | ||
| 515 | } | ||
| 516 | |||
| 517 | case Operation::Lerp: | ||
| 518 | return ((input[0] * input[2] + | ||
| 519 | input[1] * | ||
| 520 | (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / | ||
| 521 | 255) | ||
| 522 | .Cast<u8>(); | ||
| 523 | |||
| 524 | case Operation::Subtract: { | ||
| 525 | auto result = input[0].Cast<int>() - input[1].Cast<int>(); | ||
| 526 | result.r() = std::max(0, result.r()); | ||
| 527 | result.g() = std::max(0, result.g()); | ||
| 528 | result.b() = std::max(0, result.b()); | ||
| 529 | return result.Cast<u8>(); | ||
| 530 | } | ||
| 531 | |||
| 532 | case Operation::MultiplyThenAdd: { | ||
| 533 | auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255; | ||
| 534 | result.r() = std::min(255, result.r()); | ||
| 535 | result.g() = std::min(255, result.g()); | ||
| 536 | result.b() = std::min(255, result.b()); | ||
| 537 | return result.Cast<u8>(); | ||
| 538 | } | ||
| 539 | |||
| 540 | case Operation::AddThenMultiply: { | ||
| 541 | auto result = input[0] + input[1]; | ||
| 542 | result.r() = std::min(255, result.r()); | ||
| 543 | result.g() = std::min(255, result.g()); | ||
| 544 | result.b() = std::min(255, result.b()); | ||
| 545 | result = (result * input[2].Cast<int>()) / 255; | ||
| 546 | return result.Cast<u8>(); | ||
| 547 | } | ||
| 548 | case Operation::Dot3_RGB: { | ||
| 549 | // Not fully accurate. | ||
| 550 | // Worst case scenario seems to yield a +/-3 error | ||
| 551 | // Some HW results indicate that the per-component computation can't have a | ||
| 552 | // higher precision than 1/256, | ||
| 553 | // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( | ||
| 554 | // (0x80,g0,b0),(0x80,g1,b1) ) give different results | ||
| 555 | int result = | ||
| 556 | ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||
| 557 | ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||
| 558 | ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||
| 559 | result = std::max(0, std::min(255, result)); | ||
| 560 | return {(u8)result, (u8)result, (u8)result}; | ||
| 561 | } | ||
| 562 | default: | ||
| 563 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op); | ||
| 564 | UNIMPLEMENTED(); | ||
| 565 | return {0, 0, 0}; | ||
| 566 | } | ||
| 567 | }; | ||
| 568 | |||
| 569 | static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 { | ||
| 570 | switch (op) { | ||
| 571 | case Operation::Replace: | ||
| 572 | return input[0]; | ||
| 573 | |||
| 574 | case Operation::Modulate: | ||
| 575 | return input[0] * input[1] / 255; | ||
| 576 | |||
| 577 | case Operation::Add: | ||
| 578 | return std::min(255, input[0] + input[1]); | ||
| 579 | |||
| 580 | case Operation::AddSigned: { | ||
| 581 | // TODO(bunnei): Verify that the color conversion from (float) 0.5f to | ||
| 582 | // (byte) 128 is correct | ||
| 583 | auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; | ||
| 584 | return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); | ||
| 585 | } | ||
| 586 | |||
| 587 | case Operation::Lerp: | ||
| 588 | return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; | ||
| 589 | |||
| 590 | case Operation::Subtract: | ||
| 591 | return std::max(0, (int)input[0] - (int)input[1]); | ||
| 592 | |||
| 593 | case Operation::MultiplyThenAdd: | ||
| 594 | return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255); | ||
| 595 | |||
| 596 | case Operation::AddThenMultiply: | ||
| 597 | return (std::min(255, (input[0] + input[1])) * input[2]) / 255; | ||
| 598 | |||
| 599 | default: | ||
| 600 | LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op); | ||
| 601 | UNIMPLEMENTED(); | ||
| 602 | return 0; | ||
| 603 | } | ||
| 604 | }; | ||
| 605 | |||
| 606 | // color combiner | 703 | // color combiner |
| 607 | // NOTE: Not sure if the alpha combiner might use the color output of the previous | 704 | // NOTE: Not sure if the alpha combiner might use the color output of the previous |
| 608 | // stage as input. Hence, we currently don't directly write the result to | 705 | // stage as input. Hence, we currently don't directly write the result to |
| @@ -917,56 +1014,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
| 917 | return combiner_output[channel]; | 1014 | return combiner_output[channel]; |
| 918 | }; | 1015 | }; |
| 919 | 1016 | ||
| 920 | static auto EvaluateBlendEquation = []( | ||
| 921 | const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, | ||
| 922 | const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, | ||
| 923 | FramebufferRegs::BlendEquation equation) { | ||
| 924 | |||
| 925 | Math::Vec4<int> result; | ||
| 926 | |||
| 927 | auto src_result = (src * srcfactor).Cast<int>(); | ||
| 928 | auto dst_result = (dest * destfactor).Cast<int>(); | ||
| 929 | |||
| 930 | switch (equation) { | ||
| 931 | case FramebufferRegs::BlendEquation::Add: | ||
| 932 | result = (src_result + dst_result) / 255; | ||
| 933 | break; | ||
| 934 | |||
| 935 | case FramebufferRegs::BlendEquation::Subtract: | ||
| 936 | result = (src_result - dst_result) / 255; | ||
| 937 | break; | ||
| 938 | |||
| 939 | case FramebufferRegs::BlendEquation::ReverseSubtract: | ||
| 940 | result = (dst_result - src_result) / 255; | ||
| 941 | break; | ||
| 942 | |||
| 943 | // TODO: How do these two actually work? | ||
| 944 | // OpenGL doesn't include the blend factors in the min/max computations, | ||
| 945 | // but is this what the 3DS actually does? | ||
| 946 | case FramebufferRegs::BlendEquation::Min: | ||
| 947 | result.r() = std::min(src.r(), dest.r()); | ||
| 948 | result.g() = std::min(src.g(), dest.g()); | ||
| 949 | result.b() = std::min(src.b(), dest.b()); | ||
| 950 | result.a() = std::min(src.a(), dest.a()); | ||
| 951 | break; | ||
| 952 | |||
| 953 | case FramebufferRegs::BlendEquation::Max: | ||
| 954 | result.r() = std::max(src.r(), dest.r()); | ||
| 955 | result.g() = std::max(src.g(), dest.g()); | ||
| 956 | result.b() = std::max(src.b(), dest.b()); | ||
| 957 | result.a() = std::max(src.a(), dest.a()); | ||
| 958 | break; | ||
| 959 | |||
| 960 | default: | ||
| 961 | LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation); | ||
| 962 | UNIMPLEMENTED(); | ||
| 963 | } | ||
| 964 | |||
| 965 | return Math::Vec4<u8>( | ||
| 966 | MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255), | ||
| 967 | MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255)); | ||
| 968 | }; | ||
| 969 | |||
| 970 | auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), | 1017 | auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), |
| 971 | LookupFactor(1, params.factor_source_rgb), | 1018 | LookupFactor(1, params.factor_source_rgb), |
| 972 | LookupFactor(2, params.factor_source_rgb), | 1019 | LookupFactor(2, params.factor_source_rgb), |
| @@ -983,58 +1030,6 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve | |||
| 983 | dstfactor, params.blend_equation_a) | 1030 | dstfactor, params.blend_equation_a) |
| 984 | .a(); | 1031 | .a(); |
| 985 | } else { | 1032 | } else { |
| 986 | static auto LogicOp = [](u8 src, u8 dest, FramebufferRegs::LogicOp op) -> u8 { | ||
| 987 | switch (op) { | ||
| 988 | case FramebufferRegs::LogicOp::Clear: | ||
| 989 | return 0; | ||
| 990 | |||
| 991 | case FramebufferRegs::LogicOp::And: | ||
| 992 | return src & dest; | ||
| 993 | |||
| 994 | case FramebufferRegs::LogicOp::AndReverse: | ||
| 995 | return src & ~dest; | ||
| 996 | |||
| 997 | case FramebufferRegs::LogicOp::Copy: | ||
| 998 | return src; | ||
| 999 | |||
| 1000 | case FramebufferRegs::LogicOp::Set: | ||
| 1001 | return 255; | ||
| 1002 | |||
| 1003 | case FramebufferRegs::LogicOp::CopyInverted: | ||
| 1004 | return ~src; | ||
| 1005 | |||
| 1006 | case FramebufferRegs::LogicOp::NoOp: | ||
| 1007 | return dest; | ||
| 1008 | |||
| 1009 | case FramebufferRegs::LogicOp::Invert: | ||
| 1010 | return ~dest; | ||
| 1011 | |||
| 1012 | case FramebufferRegs::LogicOp::Nand: | ||
| 1013 | return ~(src & dest); | ||
| 1014 | |||
| 1015 | case FramebufferRegs::LogicOp::Or: | ||
| 1016 | return src | dest; | ||
| 1017 | |||
| 1018 | case FramebufferRegs::LogicOp::Nor: | ||
| 1019 | return ~(src | dest); | ||
| 1020 | |||
| 1021 | case FramebufferRegs::LogicOp::Xor: | ||
| 1022 | return src ^ dest; | ||
| 1023 | |||
| 1024 | case FramebufferRegs::LogicOp::Equiv: | ||
| 1025 | return ~(src ^ dest); | ||
| 1026 | |||
| 1027 | case FramebufferRegs::LogicOp::AndInverted: | ||
| 1028 | return ~src & dest; | ||
| 1029 | |||
| 1030 | case FramebufferRegs::LogicOp::OrReverse: | ||
| 1031 | return src | ~dest; | ||
| 1032 | |||
| 1033 | case FramebufferRegs::LogicOp::OrInverted: | ||
| 1034 | return ~src | dest; | ||
| 1035 | } | ||
| 1036 | }; | ||
| 1037 | |||
| 1038 | blend_output = | 1033 | blend_output = |
| 1039 | Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), | 1034 | Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), |
| 1040 | LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), | 1035 | LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), |