diff options
| author | 2021-03-24 00:02:30 +0100 | |
|---|---|---|
| committer | 2021-07-22 21:51:24 -0400 | |
| commit | 8cb9443cb99c4510e6ef26a91d09a31a8fa6281f (patch) | |
| tree | 2337f294c7179e1e2e98cafedde5c2eb254965cb /src/shader_recompiler/frontend | |
| parent | shader: Implement NDC [-1, 1], attribute types and default varying initializa... (diff) | |
| download | yuzu-8cb9443cb99c4510e6ef26a91d09a31a8fa6281f.tar.gz yuzu-8cb9443cb99c4510e6ef26a91d09a31a8fa6281f.tar.xz yuzu-8cb9443cb99c4510e6ef26a91d09a31a8fa6281f.zip | |
shader: Fix F2I
Diffstat (limited to '')
6 files changed, 124 insertions, 5 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ce610799a..6280c08f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -731,6 +731,24 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { | |||
| 731 | } | 731 | } |
| 732 | } | 732 | } |
| 733 | 733 | ||
| 734 | F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value, | ||
| 735 | const F16F32F64& max_value) { | ||
| 736 | if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) { | ||
| 737 | throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(), | ||
| 738 | max_value.Type()); | ||
| 739 | } | ||
| 740 | switch (value.Type()) { | ||
| 741 | case Type::F16: | ||
| 742 | return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value); | ||
| 743 | case Type::F32: | ||
| 744 | return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value); | ||
| 745 | case Type::F64: | ||
| 746 | return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value); | ||
| 747 | default: | ||
| 748 | ThrowInvalidType(value.Type()); | ||
| 749 | } | ||
| 750 | } | ||
| 751 | |||
| 734 | F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { | 752 | F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { |
| 735 | switch (value.Type()) { | 753 | switch (value.Type()) { |
| 736 | case Type::F16: | 754 | case Type::F16: |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 39109b0de..ebbda78a9 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -147,6 +147,7 @@ public: | |||
| 147 | [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); | 147 | [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); |
| 148 | [[nodiscard]] F32 FPSqrt(const F32& value); | 148 | [[nodiscard]] F32 FPSqrt(const F32& value); |
| 149 | [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); | 149 | [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); |
| 150 | [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value); | ||
| 150 | [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); | 151 | [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); |
| 151 | [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); | 152 | [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); |
| 152 | [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); | 153 | [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8945c7b04..dd17212a1 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -192,6 +192,9 @@ OPCODE(FPLog2, F32, F32, | |||
| 192 | OPCODE(FPSaturate16, F16, F16, ) | 192 | OPCODE(FPSaturate16, F16, F16, ) |
| 193 | OPCODE(FPSaturate32, F32, F32, ) | 193 | OPCODE(FPSaturate32, F32, F32, ) |
| 194 | OPCODE(FPSaturate64, F64, F64, ) | 194 | OPCODE(FPSaturate64, F64, F64, ) |
| 195 | OPCODE(FPClamp16, F16, F16, F16, F16, ) | ||
| 196 | OPCODE(FPClamp32, F32, F32, F32, F32, ) | ||
| 197 | OPCODE(FPClamp64, F64, F64, F64, F64, ) | ||
| 195 | OPCODE(FPRoundEven16, F16, F16, ) | 198 | OPCODE(FPRoundEven16, F16, F16, ) |
| 196 | OPCODE(FPRoundEven32, F32, F32, ) | 199 | OPCODE(FPRoundEven32, F32, F32, ) |
| 197 | OPCODE(FPRoundEven64, F64, F64, ) | 200 | OPCODE(FPRoundEven64, F64, F64, ) |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 81175627f..7c5a72800 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <limits> | ||
| 6 | |||
| 5 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 6 | #include "shader_recompiler/exception.h" | 8 | #include "shader_recompiler/exception.h" |
| 7 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | 9 | #include "shader_recompiler/frontend/maxwell/opcodes.h" |
| @@ -55,6 +57,37 @@ size_t BitSize(DestFormat dest_format) { | |||
| 55 | } | 57 | } |
| 56 | } | 58 | } |
| 57 | 59 | ||
| 60 | std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) { | ||
| 61 | if (is_signed) { | ||
| 62 | switch (format) { | ||
| 63 | case DestFormat::I16: | ||
| 64 | return {static_cast<f64>(std::numeric_limits<s16>::max()), | ||
| 65 | static_cast<f64>(std::numeric_limits<s16>::min())}; | ||
| 66 | case DestFormat::I32: | ||
| 67 | return {static_cast<f64>(std::numeric_limits<s32>::max()), | ||
| 68 | static_cast<f64>(std::numeric_limits<s32>::min())}; | ||
| 69 | case DestFormat::I64: | ||
| 70 | return {static_cast<f64>(std::numeric_limits<s64>::max()), | ||
| 71 | static_cast<f64>(std::numeric_limits<s64>::min())}; | ||
| 72 | default: {} | ||
| 73 | } | ||
| 74 | } else { | ||
| 75 | switch (format) { | ||
| 76 | case DestFormat::I16: | ||
| 77 | return {static_cast<f64>(std::numeric_limits<u16>::max()), | ||
| 78 | static_cast<f64>(std::numeric_limits<u16>::min())}; | ||
| 79 | case DestFormat::I32: | ||
| 80 | return {static_cast<f64>(std::numeric_limits<u32>::max()), | ||
| 81 | static_cast<f64>(std::numeric_limits<u32>::min())}; | ||
| 82 | case DestFormat::I64: | ||
| 83 | return {static_cast<f64>(std::numeric_limits<u64>::max()), | ||
| 84 | static_cast<f64>(std::numeric_limits<u64>::min())}; | ||
| 85 | default: {} | ||
| 86 | } | ||
| 87 | } | ||
| 88 | throw NotImplementedException("Invalid destination format {}", format); | ||
| 89 | } | ||
| 90 | |||
| 58 | IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { | 91 | IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { |
| 59 | union { | 92 | union { |
| 60 | u64 raw; | 93 | u64 raw; |
| @@ -112,13 +145,58 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { | |||
| 112 | // For example converting F32 65537.0 to U16, the expected value is 0xffff, | 145 | // For example converting F32 65537.0 to U16, the expected value is 0xffff, |
| 113 | 146 | ||
| 114 | const bool is_signed{f2i.is_signed != 0}; | 147 | const bool is_signed{f2i.is_signed != 0}; |
| 115 | const size_t bitsize{BitSize(f2i.dest_format)}; | 148 | const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); |
| 116 | const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; | 149 | |
| 150 | IR::F16F32F64 intermediate; | ||
| 151 | switch (f2i.src_format) { | ||
| 152 | case SrcFormat::F16: { | ||
| 153 | const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))}; | ||
| 154 | const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))}; | ||
| 155 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 156 | break; | ||
| 157 | } | ||
| 158 | case SrcFormat::F32: { | ||
| 159 | const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))}; | ||
| 160 | const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))}; | ||
| 161 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | case SrcFormat::F64: { | ||
| 165 | const IR::F64 max_val{v.ir.Imm64(max_bound)}; | ||
| 166 | const IR::F64 min_val{v.ir.Imm64(min_bound)}; | ||
| 167 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 168 | break; | ||
| 169 | } | ||
| 170 | default: | ||
| 171 | throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); | ||
| 172 | } | ||
| 173 | |||
| 174 | const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))}; | ||
| 175 | IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; | ||
| 176 | |||
| 177 | bool handled_special_case = false; | ||
| 178 | const bool special_nan_cases = | ||
| 179 | (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); | ||
| 180 | if (special_nan_cases) { | ||
| 181 | if (f2i.dest_format == DestFormat::I32) { | ||
| 182 | handled_special_case = true; | ||
| 183 | result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; | ||
| 184 | } else if (f2i.dest_format == DestFormat::I64) { | ||
| 185 | handled_special_case = true; | ||
| 186 | result = IR::U64{ | ||
| 187 | v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | if (!handled_special_case && is_signed) { | ||
| 191 | if (bitsize != 64) { | ||
| 192 | result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; | ||
| 193 | } else { | ||
| 194 | result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; | ||
| 195 | } | ||
| 196 | } | ||
| 117 | 197 | ||
| 118 | if (bitsize == 64) { | 198 | if (bitsize == 64) { |
| 119 | const IR::Value vector{v.ir.UnpackUint2x32(result)}; | 199 | v.L(f2i.dest_reg, result); |
| 120 | v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)}); | ||
| 121 | v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)}); | ||
| 122 | } else { | 200 | } else { |
| 123 | v.X(f2i.dest_reg, result); | 201 | v.X(f2i.dest_reg, result); |
| 124 | } | 202 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 758a0230a..9bae89c10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp | |||
| @@ -21,6 +21,13 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) { | |||
| 21 | return ir.GetReg(reg); | 21 | return ir.GetReg(reg); |
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | IR::U64 TranslatorVisitor::L(IR::Reg reg) { | ||
| 25 | if (!IR::IsAligned(reg, 2)) { | ||
| 26 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 27 | } | ||
| 28 | return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; | ||
| 29 | } | ||
| 30 | |||
| 24 | IR::F32 TranslatorVisitor::F(IR::Reg reg) { | 31 | IR::F32 TranslatorVisitor::F(IR::Reg reg) { |
| 25 | return ir.BitCast<IR::F32>(X(reg)); | 32 | return ir.BitCast<IR::F32>(X(reg)); |
| 26 | } | 33 | } |
| @@ -36,6 +43,16 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { | |||
| 36 | ir.SetReg(dest_reg, value); | 43 | ir.SetReg(dest_reg, value); |
| 37 | } | 44 | } |
| 38 | 45 | ||
| 46 | void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { | ||
| 47 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 48 | throw NotImplementedException("Unaligned destination register {}", dest_reg); | ||
| 49 | } | ||
| 50 | const IR::Value result{ir.UnpackUint2x32(value)}; | ||
| 51 | for (int i = 0; i < 2; i++) { | ||
| 52 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 39 | void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { | 56 | void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { |
| 40 | X(dest_reg, ir.BitCast<IR::U32>(value)); | 57 | X(dest_reg, ir.BitCast<IR::U32>(value)); |
| 41 | } | 58 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index c994fe803..54c31deb4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h | |||
| @@ -341,10 +341,12 @@ public: | |||
| 341 | void XMAD_imm(u64 insn); | 341 | void XMAD_imm(u64 insn); |
| 342 | 342 | ||
| 343 | [[nodiscard]] IR::U32 X(IR::Reg reg); | 343 | [[nodiscard]] IR::U32 X(IR::Reg reg); |
| 344 | [[nodiscard]] IR::U64 L(IR::Reg reg); | ||
| 344 | [[nodiscard]] IR::F32 F(IR::Reg reg); | 345 | [[nodiscard]] IR::F32 F(IR::Reg reg); |
| 345 | [[nodiscard]] IR::F64 D(IR::Reg reg); | 346 | [[nodiscard]] IR::F64 D(IR::Reg reg); |
| 346 | 347 | ||
| 347 | void X(IR::Reg dest_reg, const IR::U32& value); | 348 | void X(IR::Reg dest_reg, const IR::U32& value); |
| 349 | void L(IR::Reg dest_reg, const IR::U64& value); | ||
| 348 | void F(IR::Reg dest_reg, const IR::F32& value); | 350 | void F(IR::Reg dest_reg, const IR::F32& value); |
| 349 | void D(IR::Reg dest_reg, const IR::F64& value); | 351 | void D(IR::Reg dest_reg, const IR::F64& value); |
| 350 | 352 | ||