diff options
| author | 2021-03-21 00:42:56 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:24 -0400 | |
| commit | a77e764726938a26803fa90a9c69ccdd32ab09cd (patch) | |
| tree | dbc22cd8ba43dbb8f6458dca40ad078e317eb755 /src/shader_recompiler/frontend | |
| parent | shader: Fix floating point comparison for FP16 (diff) | |
| download | yuzu-a77e764726938a26803fa90a9c69ccdd32ab09cd.tar.gz yuzu-a77e764726938a26803fa90a9c69ccdd32ab09cd.tar.xz yuzu-a77e764726938a26803fa90a9c69ccdd32ab09cd.zip | |
shader: Add support for fp16 comparisons and misc fixes
Diffstat (limited to 'src/shader_recompiler/frontend')
7 files changed, 28 insertions, 14 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 652f6949e..1eda95071 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -895,15 +895,30 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpC | |||
| 895 | } | 895 | } |
| 896 | } | 896 | } |
| 897 | 897 | ||
| 898 | U1 IREmitter::FPIsNan(const F32& value) { | 898 | U1 IREmitter::FPIsNan(const F16F32F64& value) { |
| 899 | return Inst<U1>(Opcode::FPIsNan32, value); | 899 | switch (value.Type()) { |
| 900 | case Type::F16: | ||
| 901 | return Inst<U1>(Opcode::FPIsNan16, value); | ||
| 902 | case Type::F32: | ||
| 903 | return Inst<U1>(Opcode::FPIsNan32, value); | ||
| 904 | case Type::F64: | ||
| 905 | return Inst<U1>(Opcode::FPIsNan64, value); | ||
| 906 | default: | ||
| 907 | ThrowInvalidType(value.Type()); | ||
| 908 | } | ||
| 900 | } | 909 | } |
| 901 | 910 | ||
| 902 | U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) { | 911 | U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) { |
| 912 | if (lhs.Type() != rhs.Type()) { | ||
| 913 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 914 | } | ||
| 903 | return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); | 915 | return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); |
| 904 | } | 916 | } |
| 905 | 917 | ||
| 906 | U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { | 918 | U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) { |
| 919 | if (lhs.Type() != rhs.Type()) { | ||
| 920 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 921 | } | ||
| 907 | return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); | 922 | return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); |
| 908 | } | 923 | } |
| 909 | 924 | ||
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8edb11154..ab4537d88 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -161,9 +161,9 @@ public: | |||
| 161 | FpControl control = {}, bool ordered = true); | 161 | FpControl control = {}, bool ordered = true); |
| 162 | [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, | 162 | [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, |
| 163 | FpControl control = {}, bool ordered = true); | 163 | FpControl control = {}, bool ordered = true); |
| 164 | [[nodiscard]] U1 FPIsNan(const F32& value); | 164 | [[nodiscard]] U1 FPIsNan(const F16F32F64& value); |
| 165 | [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); | 165 | [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs); |
| 166 | [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); | 166 | [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs); |
| 167 | [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); | 167 | [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); |
| 168 | [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); | 168 | [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); |
| 169 | 169 | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8471db7b9..884eea7a8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -236,7 +236,9 @@ OPCODE(FPOrdGreaterThanEqual64, U1, F64, | |||
| 236 | OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) | 236 | OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) |
| 237 | OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) | 237 | OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) |
| 238 | OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) | 238 | OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) |
| 239 | OPCODE(FPIsNan16, U1, F16, ) | ||
| 239 | OPCODE(FPIsNan32, U1, F32, ) | 240 | OPCODE(FPIsNan32, U1, F32, ) |
| 241 | OPCODE(FPIsNan64, U1, F64, ) | ||
| 240 | 242 | ||
| 241 | // Integer operations | 243 | // Integer operations |
| 242 | OPCODE(IAdd32, U32, U32, U32, ) | 244 | OPCODE(IAdd32, U32, U32, U32, ) |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 19e3401ca..03e7bf047 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | namespace Shader::Maxwell { | 7 | namespace Shader::Maxwell { |
| 8 | namespace { | 8 | namespace { |
| 9 | |||
| 10 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, | 9 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, |
| 11 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { | 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { |
| 12 | union { | 11 | union { |
| @@ -66,7 +65,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi | |||
| 66 | HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, | 65 | HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, |
| 67 | hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); | 66 | hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); |
| 68 | } | 67 | } |
| 69 | } // namespace | 68 | } // Anonymous namespace |
| 70 | 69 | ||
| 71 | void TranslatorVisitor::HADD2_reg(u64 insn) { | 70 | void TranslatorVisitor::HADD2_reg(u64 insn) { |
| 72 | union { | 71 | union { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 2f3996274..8b234bd6a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | namespace Shader::Maxwell { | 7 | namespace Shader::Maxwell { |
| 8 | namespace { | 8 | namespace { |
| 9 | |||
| 10 | void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, | 9 | void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, |
| 11 | Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, | 10 | Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, |
| 12 | bool sat, HalfPrecision precision) { | 11 | bool sat, HalfPrecision precision) { |
| @@ -85,8 +84,7 @@ void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizz | |||
| 85 | HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, | 84 | HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, |
| 86 | sat, precision); | 85 | sat, precision); |
| 87 | } | 86 | } |
| 88 | 87 | } // Anonymous namespace | |
| 89 | } // namespace | ||
| 90 | 88 | ||
| 91 | void TranslatorVisitor::HFMA2_reg(u64 insn) { | 89 | void TranslatorVisitor::HFMA2_reg(u64 insn) { |
| 92 | union { | 90 | union { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index ff34a8c8f..2451a6ef6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | namespace Shader::Maxwell { | 7 | namespace Shader::Maxwell { |
| 8 | namespace { | 8 | namespace { |
| 9 | |||
| 10 | void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, | 9 | void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, |
| 11 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, | 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, |
| 12 | HalfPrecision precision) { | 11 | HalfPrecision precision) { |
| @@ -79,7 +78,7 @@ void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, boo | |||
| 79 | HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, | 78 | HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, |
| 80 | hmul2.precision); | 79 | hmul2.precision); |
| 81 | } | 80 | } |
| 82 | } // namespace | 81 | } // Anonymous namespace |
| 83 | 82 | ||
| 84 | void TranslatorVisitor::HMUL2_reg(u64 insn) { | 83 | void TranslatorVisitor::HMUL2_reg(u64 insn) { |
| 85 | union { | 84 | union { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 1d28c0531..7f1f4b88c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp | |||
| @@ -76,6 +76,7 @@ void TranslatorVisitor::HSET2_reg(u64 insn) { | |||
| 76 | BitField<35, 4, FPCompareOp> compare_op; | 76 | BitField<35, 4, FPCompareOp> compare_op; |
| 77 | BitField<28, 2, Swizzle> swizzle_b; | 77 | BitField<28, 2, Swizzle> swizzle_b; |
| 78 | } const hset2{insn}; | 78 | } const hset2{insn}; |
| 79 | |||
| 79 | HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, | 80 | HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, |
| 80 | hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); | 81 | hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); |
| 81 | } | 82 | } |