diff options
| author | 2021-03-21 09:32:16 +0100 | |
|---|---|---|
| committer | 2021-07-22 21:51:24 -0400 | |
| commit | a62f04efab4331eeabd4441962f86a5e87db3f2d (patch) | |
| tree | 679974e509fac5a738a3661a44f0a475d22a5a71 /src/shader_recompiler/frontend | |
| parent | shader: Add missing fp64 usage flags (diff) | |
| download | yuzu-a62f04efab4331eeabd4441962f86a5e87db3f2d.tar.gz yuzu-a62f04efab4331eeabd4441962f86a5e87db3f2d.tar.xz yuzu-a62f04efab4331eeabd4441962f86a5e87db3f2d.zip | |
shader: Implement F2F
Diffstat (limited to 'src/shader_recompiler/frontend')
4 files changed, 188 insertions, 19 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 1eda95071..00c909f3e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -1361,7 +1361,7 @@ U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { | |||
| 1361 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | 1361 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); |
| 1362 | } | 1362 | } |
| 1363 | 1363 | ||
| 1364 | F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | 1364 | F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) { |
| 1365 | switch (result_bitsize) { | 1365 | switch (result_bitsize) { |
| 1366 | case 16: | 1366 | case 16: |
| 1367 | switch (value.Type()) { | 1367 | switch (value.Type()) { |
| @@ -1369,7 +1369,7 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | |||
| 1369 | // Nothing to do | 1369 | // Nothing to do |
| 1370 | return value; | 1370 | return value; |
| 1371 | case Type::F32: | 1371 | case Type::F32: |
| 1372 | return Inst<F16>(Opcode::ConvertF16F32, value); | 1372 | return Inst<F16>(Opcode::ConvertF16F32, Flags{control}, value); |
| 1373 | case Type::F64: | 1373 | case Type::F64: |
| 1374 | throw LogicError("Illegal conversion from F64 to F16"); | 1374 | throw LogicError("Illegal conversion from F64 to F16"); |
| 1375 | default: | 1375 | default: |
| @@ -1379,12 +1379,12 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | |||
| 1379 | case 32: | 1379 | case 32: |
| 1380 | switch (value.Type()) { | 1380 | switch (value.Type()) { |
| 1381 | case Type::F16: | 1381 | case Type::F16: |
| 1382 | return Inst<F32>(Opcode::ConvertF32F16, value); | 1382 | return Inst<F32>(Opcode::ConvertF32F16, Flags{control}, value); |
| 1383 | case Type::F32: | 1383 | case Type::F32: |
| 1384 | // Nothing to do | 1384 | // Nothing to do |
| 1385 | return value; | 1385 | return value; |
| 1386 | case Type::F64: | 1386 | case Type::F64: |
| 1387 | return Inst<F64>(Opcode::ConvertF32F64, value); | 1387 | return Inst<F32>(Opcode::ConvertF32F64, Flags{control}, value); |
| 1388 | default: | 1388 | default: |
| 1389 | break; | 1389 | break; |
| 1390 | } | 1390 | } |
| @@ -1394,10 +1394,10 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | |||
| 1394 | case Type::F16: | 1394 | case Type::F16: |
| 1395 | throw LogicError("Illegal conversion from F16 to F64"); | 1395 | throw LogicError("Illegal conversion from F16 to F64"); |
| 1396 | case Type::F32: | 1396 | case Type::F32: |
| 1397 | return Inst<F64>(Opcode::ConvertF64F32, Flags{control}, value); | ||
| 1398 | case Type::F64: | ||
| 1397 | // Nothing to do | 1399 | // Nothing to do |
| 1398 | return value; | 1400 | return value; |
| 1399 | case Type::F64: | ||
| 1400 | return Inst<F64>(Opcode::ConvertF32F64, value); | ||
| 1401 | default: | 1401 | default: |
| 1402 | break; | 1402 | break; |
| 1403 | } | 1403 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ab4537d88..346cef3ab 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -216,7 +216,8 @@ public: | |||
| 216 | const Value& value); | 216 | const Value& value); |
| 217 | 217 | ||
| 218 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); | 218 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); |
| 219 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); | 219 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value, |
| 220 | FpControl control = {}); | ||
| 220 | 221 | ||
| 221 | [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, | 222 | [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, |
| 222 | const F32& bias, const Value& offset, | 223 | const F32& bias, const Value& offset, |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp new file mode 100644 index 000000000..1e366fde0 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp | |||
| @@ -0,0 +1,180 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 6 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 7 | |||
| 8 | namespace Shader::Maxwell { | ||
| 9 | namespace { | ||
| 10 | enum class FloatFormat : u64 { | ||
| 11 | F16 = 1, | ||
| 12 | F32 = 2, | ||
| 13 | F64 = 3, | ||
| 14 | }; | ||
| 15 | |||
| 16 | enum class RoundingOp : u64 { | ||
| 17 | None = 0, | ||
| 18 | Pass = 3, | ||
| 19 | Round = 8, | ||
| 20 | Floor = 9, | ||
| 21 | Ceil = 10, | ||
| 22 | Trunc = 11, | ||
| 23 | }; | ||
| 24 | |||
| 25 | [[nodiscard]] u32 WidthSize(FloatFormat width) { | ||
| 26 | switch (width) { | ||
| 27 | case FloatFormat::F16: | ||
| 28 | return 16; | ||
| 29 | case FloatFormat::F32: | ||
| 30 | return 32; | ||
| 31 | case FloatFormat::F64: | ||
| 32 | return 64; | ||
| 33 | default: | ||
| 34 | throw NotImplementedException("Invalid width {}", width); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { | ||
| 39 | union { | ||
| 40 | u64 insn; | ||
| 41 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 42 | BitField<44, 1, u64> ftz; | ||
| 43 | BitField<45, 1, u64> neg; | ||
| 44 | BitField<50, 1, u64> sat; | ||
| 45 | BitField<39, 4, u64> rounding_op; | ||
| 46 | BitField<39, 2, FpRounding> rounding; | ||
| 47 | BitField<10, 2, FloatFormat> src_size; | ||
| 48 | BitField<8, 2, FloatFormat> dst_size; | ||
| 49 | |||
| 50 | [[nodiscard]] RoundingOp RoundingOperation() const { | ||
| 51 | constexpr u64 rounding_mask = 0x0B; | ||
| 52 | return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask); | ||
| 53 | } | ||
| 54 | } const f2f{insn}; | ||
| 55 | |||
| 56 | IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; | ||
| 57 | |||
| 58 | const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; | ||
| 59 | IR::FpControl fp_control{ | ||
| 60 | .no_contraction{false}, | ||
| 61 | .rounding{IR::FpRounding::DontCare}, | ||
| 62 | .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||
| 63 | }; | ||
| 64 | if (f2f.src_size != f2f.dst_size) { | ||
| 65 | fp_control.rounding = CastFpRounding(f2f.rounding); | ||
| 66 | input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control); | ||
| 67 | } else { | ||
| 68 | switch (f2f.RoundingOperation()) { | ||
| 69 | case RoundingOp::None: | ||
| 70 | case RoundingOp::Pass: | ||
| 71 | // Make sure NANs are handled properly | ||
| 72 | switch (f2f.src_size) { | ||
| 73 | case FloatFormat::F16: | ||
| 74 | input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); | ||
| 75 | break; | ||
| 76 | case FloatFormat::F32: | ||
| 77 | input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); | ||
| 78 | break; | ||
| 79 | case FloatFormat::F64: | ||
| 80 | input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); | ||
| 81 | break; | ||
| 82 | } | ||
| 83 | break; | ||
| 84 | case RoundingOp::Round: | ||
| 85 | input = v.ir.FPRoundEven(input, fp_control); | ||
| 86 | break; | ||
| 87 | case RoundingOp::Floor: | ||
| 88 | input = v.ir.FPFloor(input, fp_control); | ||
| 89 | break; | ||
| 90 | case RoundingOp::Ceil: | ||
| 91 | input = v.ir.FPCeil(input, fp_control); | ||
| 92 | break; | ||
| 93 | case RoundingOp::Trunc: | ||
| 94 | input = v.ir.FPTrunc(input, fp_control); | ||
| 95 | break; | ||
| 96 | default: | ||
| 97 | throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value()); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | if (f2f.sat != 0 && !any_fp64) { | ||
| 101 | input = v.ir.FPSaturate(input); | ||
| 102 | } | ||
| 103 | |||
| 104 | switch (f2f.dst_size) { | ||
| 105 | case FloatFormat::F16: { | ||
| 106 | const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; | ||
| 107 | v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); | ||
| 108 | break; | ||
| 109 | } | ||
| 110 | case FloatFormat::F32: | ||
| 111 | v.F(f2f.dest_reg, input); | ||
| 112 | break; | ||
| 113 | case FloatFormat::F64: | ||
| 114 | v.D(f2f.dest_reg, input); | ||
| 115 | break; | ||
| 116 | default: | ||
| 117 | throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value()); | ||
| 118 | } | ||
| 119 | } | ||
| 120 | } // Anonymous namespace | ||
| 121 | |||
| 122 | void TranslatorVisitor::F2F_reg(u64 insn) { | ||
| 123 | union { | ||
| 124 | u64 insn; | ||
| 125 | BitField<49, 1, u64> abs; | ||
| 126 | BitField<10, 2, FloatFormat> src_size; | ||
| 127 | BitField<41, 1, u64> selector; | ||
| 128 | } const f2f{insn}; | ||
| 129 | |||
| 130 | IR::F16F32F64 src_a; | ||
| 131 | switch (f2f.src_size) { | ||
| 132 | case FloatFormat::F16: { | ||
| 133 | auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; | ||
| 134 | src_a = f2f.selector != 0 ? rhs_a : lhs_a; | ||
| 135 | break; | ||
| 136 | } | ||
| 137 | case FloatFormat::F32: | ||
| 138 | src_a = GetFloatReg20(insn); | ||
| 139 | break; | ||
| 140 | case FloatFormat::F64: | ||
| 141 | src_a = GetDoubleReg20(insn); | ||
| 142 | break; | ||
| 143 | default: | ||
| 144 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 145 | } | ||
| 146 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 147 | } | ||
| 148 | |||
| 149 | void TranslatorVisitor::F2F_cbuf(u64 insn) { | ||
| 150 | union { | ||
| 151 | u64 insn; | ||
| 152 | BitField<49, 1, u64> abs; | ||
| 153 | BitField<10, 2, FloatFormat> src_size; | ||
| 154 | BitField<41, 1, u64> selector; | ||
| 155 | } const f2f{insn}; | ||
| 156 | |||
| 157 | IR::F16F32F64 src_a; | ||
| 158 | switch (f2f.src_size) { | ||
| 159 | case FloatFormat::F16: { | ||
| 160 | auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; | ||
| 161 | src_a = f2f.selector != 0 ? rhs_a : lhs_a; | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | case FloatFormat::F32: | ||
| 165 | src_a = GetFloatCbuf(insn); | ||
| 166 | break; | ||
| 167 | case FloatFormat::F64: | ||
| 168 | src_a = GetDoubleCbuf(insn); | ||
| 169 | break; | ||
| 170 | default: | ||
| 171 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 172 | } | ||
| 173 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 174 | } | ||
| 175 | |||
| 176 | void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { | ||
| 177 | throw NotImplementedException("Instruction"); | ||
| 178 | } | ||
| 179 | |||
| 180 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4e069912a..08f6eb788 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -117,18 +117,6 @@ void TranslatorVisitor::DSETP_imm(u64) { | |||
| 117 | ThrowNotImplemented(Opcode::DSETP_imm); | 117 | ThrowNotImplemented(Opcode::DSETP_imm); |
| 118 | } | 118 | } |
| 119 | 119 | ||
| 120 | void TranslatorVisitor::F2F_reg(u64) { | ||
| 121 | ThrowNotImplemented(Opcode::F2F_reg); | ||
| 122 | } | ||
| 123 | |||
| 124 | void TranslatorVisitor::F2F_cbuf(u64) { | ||
| 125 | ThrowNotImplemented(Opcode::F2F_cbuf); | ||
| 126 | } | ||
| 127 | |||
| 128 | void TranslatorVisitor::F2F_imm(u64) { | ||
| 129 | ThrowNotImplemented(Opcode::F2F_imm); | ||
| 130 | } | ||
| 131 | |||
| 132 | void TranslatorVisitor::FCHK_reg(u64) { | 120 | void TranslatorVisitor::FCHK_reg(u64) { |
| 133 | ThrowNotImplemented(Opcode::FCHK_reg); | 121 | ThrowNotImplemented(Opcode::FCHK_reg); |
| 134 | } | 122 | } |