diff options
Diffstat (limited to 'src')
11 files changed, 56 insertions, 14 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index e297a0e20..486ef10a7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -234,7 +234,9 @@ Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); | |||
| 234 | Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); | 234 | Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); |
| 235 | Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); | 235 | Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); |
| 236 | Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); | 236 | Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); |
| 237 | Id EmitFPIsNan16(EmitContext& ctx, Id value); | ||
| 237 | Id EmitFPIsNan32(EmitContext& ctx, Id value); | 238 | Id EmitFPIsNan32(EmitContext& ctx, Id value); |
| 239 | Id EmitFPIsNan64(EmitContext& ctx, Id value); | ||
| 238 | Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | 240 | Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); |
| 239 | void EmitIAdd64(EmitContext& ctx); | 241 | void EmitIAdd64(EmitContext& ctx); |
| 240 | Id EmitISub32(EmitContext& ctx, Id a, Id b); | 242 | Id EmitISub32(EmitContext& ctx, Id a, Id b); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index e635b1ffb..1fdf66cb6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp | |||
| @@ -346,8 +346,16 @@ Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { | |||
| 346 | return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); | 346 | return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); |
| 347 | } | 347 | } |
| 348 | 348 | ||
| 349 | Id EmitFPIsNan16(EmitContext& ctx, Id value) { | ||
| 350 | return ctx.OpIsNan(ctx.U1, value); | ||
| 351 | } | ||
| 352 | |||
| 349 | Id EmitFPIsNan32(EmitContext& ctx, Id value) { | 353 | Id EmitFPIsNan32(EmitContext& ctx, Id value) { |
| 350 | return ctx.OpIsNan(ctx.U1, value); | 354 | return ctx.OpIsNan(ctx.U1, value); |
| 351 | } | 355 | } |
| 352 | 356 | ||
| 357 | Id EmitFPIsNan64(EmitContext& ctx, Id value) { | ||
| 358 | return ctx.OpIsNan(ctx.U1, value); | ||
| 359 | } | ||
| 360 | |||
| 353 | } // namespace Shader::Backend::SPIRV | 361 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 652f6949e..1eda95071 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -895,15 +895,30 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpC | |||
| 895 | } | 895 | } |
| 896 | } | 896 | } |
| 897 | 897 | ||
| 898 | U1 IREmitter::FPIsNan(const F32& value) { | 898 | U1 IREmitter::FPIsNan(const F16F32F64& value) { |
| 899 | return Inst<U1>(Opcode::FPIsNan32, value); | 899 | switch (value.Type()) { |
| 900 | case Type::F16: | ||
| 901 | return Inst<U1>(Opcode::FPIsNan16, value); | ||
| 902 | case Type::F32: | ||
| 903 | return Inst<U1>(Opcode::FPIsNan32, value); | ||
| 904 | case Type::F64: | ||
| 905 | return Inst<U1>(Opcode::FPIsNan64, value); | ||
| 906 | default: | ||
| 907 | ThrowInvalidType(value.Type()); | ||
| 908 | } | ||
| 900 | } | 909 | } |
| 901 | 910 | ||
| 902 | U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) { | 911 | U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) { |
| 912 | if (lhs.Type() != rhs.Type()) { | ||
| 913 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 914 | } | ||
| 903 | return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); | 915 | return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); |
| 904 | } | 916 | } |
| 905 | 917 | ||
| 906 | U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { | 918 | U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) { |
| 919 | if (lhs.Type() != rhs.Type()) { | ||
| 920 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 921 | } | ||
| 907 | return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); | 922 | return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); |
| 908 | } | 923 | } |
| 909 | 924 | ||
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8edb11154..ab4537d88 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -161,9 +161,9 @@ public: | |||
| 161 | FpControl control = {}, bool ordered = true); | 161 | FpControl control = {}, bool ordered = true); |
| 162 | [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, | 162 | [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, |
| 163 | FpControl control = {}, bool ordered = true); | 163 | FpControl control = {}, bool ordered = true); |
| 164 | [[nodiscard]] U1 FPIsNan(const F32& value); | 164 | [[nodiscard]] U1 FPIsNan(const F16F32F64& value); |
| 165 | [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); | 165 | [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs); |
| 166 | [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); | 166 | [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs); |
| 167 | [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); | 167 | [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); |
| 168 | [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); | 168 | [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); |
| 169 | 169 | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8471db7b9..884eea7a8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -236,7 +236,9 @@ OPCODE(FPOrdGreaterThanEqual64, U1, F64, | |||
| 236 | OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) | 236 | OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) |
| 237 | OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) | 237 | OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) |
| 238 | OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) | 238 | OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) |
| 239 | OPCODE(FPIsNan16, U1, F16, ) | ||
| 239 | OPCODE(FPIsNan32, U1, F32, ) | 240 | OPCODE(FPIsNan32, U1, F32, ) |
| 241 | OPCODE(FPIsNan64, U1, F64, ) | ||
| 240 | 242 | ||
| 241 | // Integer operations | 243 | // Integer operations |
| 242 | OPCODE(IAdd32, U32, U32, U32, ) | 244 | OPCODE(IAdd32, U32, U32, U32, ) |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 19e3401ca..03e7bf047 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | namespace Shader::Maxwell { | 7 | namespace Shader::Maxwell { |
| 8 | namespace { | 8 | namespace { |
| 9 | |||
| 10 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, | 9 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, |
| 11 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { | 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { |
| 12 | union { | 11 | union { |
| @@ -66,7 +65,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi | |||
| 66 | HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, | 65 | HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, |
| 67 | hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); | 66 | hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); |
| 68 | } | 67 | } |
| 69 | } // namespace | 68 | } // Anonymous namespace |
| 70 | 69 | ||
| 71 | void TranslatorVisitor::HADD2_reg(u64 insn) { | 70 | void TranslatorVisitor::HADD2_reg(u64 insn) { |
| 72 | union { | 71 | union { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 2f3996274..8b234bd6a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | namespace Shader::Maxwell { | 7 | namespace Shader::Maxwell { |
| 8 | namespace { | 8 | namespace { |
| 9 | |||
| 10 | void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, | 9 | void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, |
| 11 | Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, | 10 | Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, |
| 12 | bool sat, HalfPrecision precision) { | 11 | bool sat, HalfPrecision precision) { |
| @@ -85,8 +84,7 @@ void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizz | |||
| 85 | HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, | 84 | HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, |
| 86 | sat, precision); | 85 | sat, precision); |
| 87 | } | 86 | } |
| 88 | 87 | } // Anonymous namespace | |
| 89 | } // namespace | ||
| 90 | 88 | ||
| 91 | void TranslatorVisitor::HFMA2_reg(u64 insn) { | 89 | void TranslatorVisitor::HFMA2_reg(u64 insn) { |
| 92 | union { | 90 | union { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index ff34a8c8f..2451a6ef6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | namespace Shader::Maxwell { | 7 | namespace Shader::Maxwell { |
| 8 | namespace { | 8 | namespace { |
| 9 | |||
| 10 | void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, | 9 | void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, |
| 11 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, | 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, |
| 12 | HalfPrecision precision) { | 11 | HalfPrecision precision) { |
| @@ -79,7 +78,7 @@ void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, boo | |||
| 79 | HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, | 78 | HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, |
| 80 | hmul2.precision); | 79 | hmul2.precision); |
| 81 | } | 80 | } |
| 82 | } // namespace | 81 | } // Anonymous namespace |
| 83 | 82 | ||
| 84 | void TranslatorVisitor::HMUL2_reg(u64 insn) { | 83 | void TranslatorVisitor::HMUL2_reg(u64 insn) { |
| 85 | union { | 84 | union { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 1d28c0531..7f1f4b88c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp | |||
| @@ -76,6 +76,7 @@ void TranslatorVisitor::HSET2_reg(u64 insn) { | |||
| 76 | BitField<35, 4, FPCompareOp> compare_op; | 76 | BitField<35, 4, FPCompareOp> compare_op; |
| 77 | BitField<28, 2, Swizzle> swizzle_b; | 77 | BitField<28, 2, Swizzle> swizzle_b; |
| 78 | } const hset2{insn}; | 78 | } const hset2{insn}; |
| 79 | |||
| 79 | HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, | 80 | HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, |
| 80 | hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); | 81 | hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); |
| 81 | } | 82 | } |
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e7fa3fce0..fd6069c65 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -74,6 +74,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 74 | case IR::Opcode::CompositeExtractF16x2: | 74 | case IR::Opcode::CompositeExtractF16x2: |
| 75 | case IR::Opcode::CompositeExtractF16x3: | 75 | case IR::Opcode::CompositeExtractF16x3: |
| 76 | case IR::Opcode::CompositeExtractF16x4: | 76 | case IR::Opcode::CompositeExtractF16x4: |
| 77 | case IR::Opcode::CompositeInsertF16x2: | ||
| 78 | case IR::Opcode::CompositeInsertF16x3: | ||
| 79 | case IR::Opcode::CompositeInsertF16x4: | ||
| 77 | case IR::Opcode::SelectF16: | 80 | case IR::Opcode::SelectF16: |
| 78 | case IR::Opcode::BitCastU16F16: | 81 | case IR::Opcode::BitCastU16F16: |
| 79 | case IR::Opcode::BitCastF16U16: | 82 | case IR::Opcode::BitCastF16U16: |
| @@ -103,6 +106,19 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 103 | case IR::Opcode::FPRoundEven16: | 106 | case IR::Opcode::FPRoundEven16: |
| 104 | case IR::Opcode::FPSaturate16: | 107 | case IR::Opcode::FPSaturate16: |
| 105 | case IR::Opcode::FPTrunc16: | 108 | case IR::Opcode::FPTrunc16: |
| 109 | case IR::Opcode::FPOrdEqual16: | ||
| 110 | case IR::Opcode::FPUnordEqual16: | ||
| 111 | case IR::Opcode::FPOrdNotEqual16: | ||
| 112 | case IR::Opcode::FPUnordNotEqual16: | ||
| 113 | case IR::Opcode::FPOrdLessThan16: | ||
| 114 | case IR::Opcode::FPUnordLessThan16: | ||
| 115 | case IR::Opcode::FPOrdGreaterThan16: | ||
| 116 | case IR::Opcode::FPUnordGreaterThan16: | ||
| 117 | case IR::Opcode::FPOrdLessThanEqual16: | ||
| 118 | case IR::Opcode::FPUnordLessThanEqual16: | ||
| 119 | case IR::Opcode::FPOrdGreaterThanEqual16: | ||
| 120 | case IR::Opcode::FPUnordGreaterThanEqual16: | ||
| 121 | case IR::Opcode::FPIsNan16: | ||
| 106 | info.uses_fp16 = true; | 122 | info.uses_fp16 = true; |
| 107 | break; | 123 | break; |
| 108 | case IR::Opcode::FPAbs64: | 124 | case IR::Opcode::FPAbs64: |
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 7723c9a57..0e8862f45 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | |||
| @@ -74,6 +74,8 @@ IR::Opcode Replace(IR::Opcode op) { | |||
| 74 | return IR::Opcode::FPOrdGreaterThanEqual32; | 74 | return IR::Opcode::FPOrdGreaterThanEqual32; |
| 75 | case IR::Opcode::FPUnordGreaterThanEqual16: | 75 | case IR::Opcode::FPUnordGreaterThanEqual16: |
| 76 | return IR::Opcode::FPUnordGreaterThanEqual32; | 76 | return IR::Opcode::FPUnordGreaterThanEqual32; |
| 77 | case IR::Opcode::FPIsNan16: | ||
| 78 | return IR::Opcode::FPIsNan32; | ||
| 77 | case IR::Opcode::ConvertS16F16: | 79 | case IR::Opcode::ConvertS16F16: |
| 78 | return IR::Opcode::ConvertS16F32; | 80 | return IR::Opcode::ConvertS16F32; |
| 79 | case IR::Opcode::ConvertS32F16: | 81 | case IR::Opcode::ConvertS32F16: |