diff options
| author | 2015-08-23 21:46:36 -0300 | |
|---|---|---|
| committer | 2015-08-24 01:29:40 -0300 | |
| commit | 455147ee95e8de7af237fc66aba9bbd52474ec3b (patch) | |
| tree | cf5d4dc786e25d96cf794030998f080216f76cf9 /src | |
| parent | Merge pull request #1062 from aroulin/shader-rcp-rsq (diff) | |
| download | yuzu-455147ee95e8de7af237fc66aba9bbd52474ec3b.tar.gz yuzu-455147ee95e8de7af237fc66aba9bbd52474ec3b.tar.xz yuzu-455147ee95e8de7af237fc66aba9bbd52474ec3b.zip | |
Shader JIT: Fix CMP NaN behavior to match hardware
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 31 |
1 files changed, 23 insertions, 8 deletions
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 6865c64e3..2a1e51013 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -578,27 +578,42 @@ void JitCompiler::Compile_CALLU(Instruction instr) { | |||
| 578 | } | 578 | } |
| 579 | 579 | ||
| 580 | void JitCompiler::Compile_CMP(Instruction instr) { | 580 | void JitCompiler::Compile_CMP(Instruction instr) { |
| 581 | using Op = Instruction::Common::CompareOpType::Op; | ||
| 582 | Op op_x = instr.common.compare_op.x; | ||
| 583 | Op op_y = instr.common.compare_op.y; | ||
| 584 | |||
| 581 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 585 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 582 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 586 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| 583 | 587 | ||
| 584 | static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_NLE, CMP_NLT }; | 588 | // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to |
| 589 | // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here | ||
| 590 | // because they don't match when used with NaNs. | ||
| 591 | static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE }; | ||
| 592 | |||
| 593 | bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); | ||
| 594 | Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; | ||
| 595 | Gen::X64Reg rhs_x = invert_op_x ? SRC1 : SRC2; | ||
| 585 | 596 | ||
| 586 | if (instr.common.compare_op.x == instr.common.compare_op.y) { | 597 | if (op_x == op_y) { |
| 587 | // Compare X-component and Y-component together | 598 | // Compare X-component and Y-component together |
| 588 | CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.x]); | 599 | CMPPS(lhs_x, R(rhs_x), cmp[op_x]); |
| 600 | MOVQ_xmm(R(COND0), lhs_x); | ||
| 589 | 601 | ||
| 590 | MOVQ_xmm(R(COND0), SRC1); | ||
| 591 | MOV(64, R(COND1), R(COND0)); | 602 | MOV(64, R(COND1), R(COND0)); |
| 592 | } else { | 603 | } else { |
| 604 | bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual); | ||
| 605 | Gen::X64Reg lhs_y = invert_op_y ? SRC2 : SRC1; | ||
| 606 | Gen::X64Reg rhs_y = invert_op_y ? SRC1 : SRC2; | ||
| 607 | |||
| 593 | // Compare X-component | 608 | // Compare X-component |
| 594 | MOVAPS(SCRATCH, R(SRC1)); | 609 | MOVAPS(SCRATCH, R(lhs_x)); |
| 595 | CMPSS(SCRATCH, R(SRC2), cmp[instr.common.compare_op.x]); | 610 | CMPSS(SCRATCH, R(rhs_x), cmp[op_x]); |
| 596 | 611 | ||
| 597 | // Compare Y-component | 612 | // Compare Y-component |
| 598 | CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.y]); | 613 | CMPPS(lhs_y, R(rhs_y), cmp[op_y]); |
| 599 | 614 | ||
| 600 | MOVQ_xmm(R(COND0), SCRATCH); | 615 | MOVQ_xmm(R(COND0), SCRATCH); |
| 601 | MOVQ_xmm(R(COND1), SRC1); | 616 | MOVQ_xmm(R(COND1), lhs_y); |
| 602 | } | 617 | } |
| 603 | 618 | ||
| 604 | SHR(32, R(COND0), Imm8(31)); | 619 | SHR(32, R(COND0), Imm8(31)); |