shader: Add support for fp16 comparisons and misc fixes

author: ReinUsesLisp 2021-03-21 00:42:56 -0300
committer: ameerj 2021-07-22 21:51:24 -0400
commit: a77e764726938a26803fa90a9c69ccdd32ab09cd (patch)
tree: dbc22cd8ba43dbb8f6458dca40ad078e317eb755 /src/shader_recompiler/frontend
parent: shader: Fix floating point comparison for FP16 (diff)
download: yuzu-a77e764726938a26803fa90a9c69ccdd32ab09cd.tar.gz
yuzu-a77e764726938a26803fa90a9c69ccdd32ab09cd.tar.xz
yuzu-a77e764726938a26803fa90a9c69ccdd32ab09cd.zip
7 files changed, 28 insertions, 14 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 652f6949e..1eda95071 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -895,15 +895,30 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpC
    }
 }
-U1 IREmitter::FPIsNan(const F32& value) {
+U1 IREmitter::FPIsNan(const F16F32F64& value) {
-    return Inst<U1>(Opcode::FPIsNan32, value);
+    switch (value.Type()) {
+    case Type::F16:
+        return Inst<U1>(Opcode::FPIsNan16, value);
+    case Type::F32:
+        return Inst<U1>(Opcode::FPIsNan32, value);
+    case Type::F64:
+        return Inst<U1>(Opcode::FPIsNan64, value);
+    default:
+        ThrowInvalidType(value.Type());
+    }
 }
-U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) {
+U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
    return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs)));
 }
-U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) {
+U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
    return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 8edb11154..ab4537d88 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -161,9 +161,9 @@ public:
                                     FpControl control = {}, bool ordered = true);
    [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
                                        FpControl control = {}, bool ordered = true);
-    [[nodiscard]] U1 FPIsNan(const F32& value);
+    [[nodiscard]] U1 FPIsNan(const F16F32F64& value);
-    [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs);
+    [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs);
-    [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs);
+    [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs);
    [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
    [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 8471db7b9..884eea7a8 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -236,7 +236,9 @@ OPCODE(FPOrdGreaterThanEqual64,                             U1,             F64,
 OPCODE(FPUnordGreaterThanEqual16,                           U1,             F16,            F16,                                                            )
 OPCODE(FPUnordGreaterThanEqual32,                           U1,             F32,            F32,                                                            )
 OPCODE(FPUnordGreaterThanEqual64,                           U1,             F64,            F64,                                                            )
+OPCODE(FPIsNan16,                                           U1,             F16,                                                                            )
 OPCODE(FPIsNan32,                                           U1,             F32,                                                                            )
+OPCODE(FPIsNan64,                                           U1,             F64,                                                                            )
 // Integer operations
 OPCODE(IAdd32,                                              U32,            U32,            U32,                                                            )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
index 19e3401ca..03e7bf047 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -6,7 +6,6 @@
 namespace Shader::Maxwell {
 namespace {
 void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
           Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
    union {
@@ -66,7 +65,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi
    HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
          hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
 }
-} // namespace
+} // Anonymous namespace
 void TranslatorVisitor::HADD2_reg(u64 insn) {
    union {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
index 2f3996274..8b234bd6a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -6,7 +6,6 @@
 namespace Shader::Maxwell {
 namespace {
 void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
           Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
           bool sat, HalfPrecision precision) {
@@ -85,8 +84,7 @@ void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizz
    HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
          sat, precision);
 }
+} // Anonymous namespace
-} // namespace
 void TranslatorVisitor::HFMA2_reg(u64 insn) {
    union {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
index ff34a8c8f..2451a6ef6 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -6,7 +6,6 @@
 namespace Shader::Maxwell {
 namespace {
 void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
           Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
           HalfPrecision precision) {
@@ -79,7 +78,7 @@ void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, boo
    HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
          hmul2.precision);
 }
-} // namespace
+} // Anonymous namespace
 void TranslatorVisitor::HMUL2_reg(u64 insn) {
    union {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
index 1d28c0531..7f1f4b88c 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -76,6 +76,7 @@ void TranslatorVisitor::HSET2_reg(u64 insn) {
        BitField<35, 4, FPCompareOp> compare_op;
        BitField<28, 2, Swizzle> swizzle_b;
    } const hset2{insn};
    HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
          hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
 }
author	ReinUsesLisp	2021-03-21 00:42:56 -0300
committer	ameerj	2021-07-22 21:51:24 -0400
commit	a77e764726938a26803fa90a9c69ccdd32ab09cd (patch)
tree	dbc22cd8ba43dbb8f6458dca40ad078e317eb755 /src/shader_recompiler/frontend
parent	shader: Fix floating point comparison for FP16 (diff)
download	yuzu-a77e764726938a26803fa90a9c69ccdd32ab09cd.tar.gz yuzu-a77e764726938a26803fa90a9c69ccdd32ab09cd.tar.xz yuzu-a77e764726938a26803fa90a9c69ccdd32ab09cd.zip

diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 652f6949e..1eda95071 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -895,15 +895,30 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpC
895	}	895	}
896	}	896	}
897		897
898	U1 IREmitter::FPIsNan(const F32& value) {	898	U1 IREmitter::FPIsNan(const F16F32F64& value) {
899	return Inst<U1>(Opcode::FPIsNan32, value);	899	switch (value.Type()) {
		900	case Type::F16:
		901	return Inst<U1>(Opcode::FPIsNan16, value);
		902	case Type::F32:
		903	return Inst<U1>(Opcode::FPIsNan32, value);
		904	case Type::F64:
		905	return Inst<U1>(Opcode::FPIsNan64, value);
		906	default:
		907	ThrowInvalidType(value.Type());
		908	}
900	}	909	}
901		910
902	U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) {	911	U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) {
		912	if (lhs.Type() != rhs.Type()) {
		913	throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
		914	}
903	return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs)));	915	return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs)));
904	}	916	}
905		917
906	U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) {	918	U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) {
		919	if (lhs.Type() != rhs.Type()) {
		920	throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
		921	}
907	return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));	922	return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
908	}	923	}
909		924


diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8edb11154..ab4537d88 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -161,9 +161,9 @@ public:
161	FpControl control = {}, bool ordered = true);	161	FpControl control = {}, bool ordered = true);
162	[[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,	162	[[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
163	FpControl control = {}, bool ordered = true);	163	FpControl control = {}, bool ordered = true);
164	[[nodiscard]] U1 FPIsNan(const F32& value);	164	[[nodiscard]] U1 FPIsNan(const F16F32F64& value);
165	[[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs);	165	[[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs);
166	[[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs);	166	[[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs);
167	[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});	167	[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
168	[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});	168	[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
169		169


diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8471db7b9..884eea7a8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -236,7 +236,9 @@ OPCODE(FPOrdGreaterThanEqual64, U1, F64,
236	OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )	236	OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
237	OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )	237	OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
238	OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )	238	OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
		239	OPCODE(FPIsNan16, U1, F16, )
239	OPCODE(FPIsNan32, U1, F32, )	240	OPCODE(FPIsNan32, U1, F32, )
		241	OPCODE(FPIsNan64, U1, F64, )
240		242
241	// Integer operations	243	// Integer operations
242	OPCODE(IAdd32, U32, U32, U32, )	244	OPCODE(IAdd32, U32, U32, U32, )


diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 19e3401ca..03e7bf047 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -6,7 +6,6 @@
6		6
7	namespace Shader::Maxwell {	7	namespace Shader::Maxwell {
8	namespace {	8	namespace {
9
10	void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,	9	void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
11	Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {	10	Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
12	union {	11	union {
@@ -66,7 +65,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi
66	HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,	65	HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
67	hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);	66	hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
68	}	67	}
69	} // namespace	68	} // Anonymous namespace
70		69
71	void TranslatorVisitor::HADD2_reg(u64 insn) {	70	void TranslatorVisitor::HADD2_reg(u64 insn) {
72	union {	71	union {


diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 2f3996274..8b234bd6a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -6,7 +6,6 @@
6		6
7	namespace Shader::Maxwell {	7	namespace Shader::Maxwell {
8	namespace {	8	namespace {
9
10	void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,	9	void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
11	Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,	10	Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
12	bool sat, HalfPrecision precision) {	11	bool sat, HalfPrecision precision) {
@@ -85,8 +84,7 @@ void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizz
85	HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,	84	HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
86	sat, precision);	85	sat, precision);
87	}	86	}
88		87	} // Anonymous namespace
89	} // namespace
90		88
91	void TranslatorVisitor::HFMA2_reg(u64 insn) {	89	void TranslatorVisitor::HFMA2_reg(u64 insn) {
92	union {	90	union {


diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index ff34a8c8f..2451a6ef6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -6,7 +6,6 @@
6		6
7	namespace Shader::Maxwell {	7	namespace Shader::Maxwell {
8	namespace {	8	namespace {
9
10	void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,	9	void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
11	Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,	10	Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
12	HalfPrecision precision) {	11	HalfPrecision precision) {
@@ -79,7 +78,7 @@ void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, boo
79	HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,	78	HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
80	hmul2.precision);	79	hmul2.precision);
81	}	80	}
82	} // namespace	81	} // Anonymous namespace
83		82
84	void TranslatorVisitor::HMUL2_reg(u64 insn) {	83	void TranslatorVisitor::HMUL2_reg(u64 insn) {
85	union {	84	union {


diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 1d28c0531..7f1f4b88c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -76,6 +76,7 @@ void TranslatorVisitor::HSET2_reg(u64 insn) {
76	BitField<35, 4, FPCompareOp> compare_op;	76	BitField<35, 4, FPCompareOp> compare_op;
77	BitField<28, 2, Swizzle> swizzle_b;	77	BitField<28, 2, Swizzle> swizzle_b;
78	} const hset2{insn};	78	} const hset2{insn};
		79
79	HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,	80	HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
80	hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);	81	hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
81	}	82	}