diff options
| author | 2021-03-03 03:07:19 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:23 -0400 | |
| commit | 4006929c986a2e0e52429fe21201a7ad5ca3fea9 (patch) | |
| tree | 9f4a1ffa7782ed76db5561e107e8ae9f71f63a15 /src/shader_recompiler/frontend/maxwell | |
| parent | shader: Implement LOP and LOP3 (diff) | |
| download | yuzu-4006929c986a2e0e52429fe21201a7ad5ca3fea9.tar.gz yuzu-4006929c986a2e0e52429fe21201a7ad5ca3fea9.tar.xz yuzu-4006929c986a2e0e52429fe21201a7ad5ca3fea9.zip | |
shader: Implement HADD2
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell')
3 files changed, 185 insertions, 17 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..6965adfb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class Merge : u64 { | ||
| 13 | H1_H0, | ||
| 14 | F32, | ||
| 15 | MRG_H0, | ||
| 16 | MRG_H1, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class Swizzle : u64 { | ||
| 20 | H1_H0, | ||
| 21 | F32, | ||
| 22 | H0_H0, | ||
| 23 | H1_H1, | ||
| 24 | }; | ||
| 25 | |||
| 26 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { | ||
| 27 | switch (swizzle) { | ||
| 28 | case Swizzle::H1_H0: { | ||
| 29 | const IR::Value vector{ir.UnpackFloat2x16(value)}; | ||
| 30 | return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; | ||
| 31 | } | ||
| 32 | case Swizzle::H0_H0: { | ||
| 33 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; | ||
| 34 | return {scalar, scalar}; | ||
| 35 | } | ||
| 36 | case Swizzle::H1_H1: { | ||
| 37 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; | ||
| 38 | return {scalar, scalar}; | ||
| 39 | } | ||
| 40 | case Swizzle::F32: { | ||
| 41 | const IR::F32 scalar{ir.BitCast<IR::F32>(value)}; | ||
| 42 | return {scalar, scalar}; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | throw InvalidArgument("Invalid swizzle {}", swizzle); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 49 | Merge merge) { | ||
| 50 | switch (merge) { | ||
| 51 | case Merge::H1_H0: | ||
| 52 | return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); | ||
| 53 | case Merge::F32: | ||
| 54 | return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs)); | ||
| 55 | case Merge::MRG_H0: | ||
| 56 | case Merge::MRG_H1: { | ||
| 57 | const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; | ||
| 58 | const bool h0{merge == Merge::MRG_H0}; | ||
| 59 | const IR::F16& insert{h0 ? lhs : rhs}; | ||
| 60 | return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | throw InvalidArgument("Invalid merge {}", merge); | ||
| 64 | } | ||
| 65 | |||
| 66 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, | ||
| 67 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 68 | union { | ||
| 69 | u64 raw; | ||
| 70 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 71 | BitField<8, 8, IR::Reg> src_a; | ||
| 72 | } const hadd2{insn}; | ||
| 73 | |||
| 74 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; | ||
| 75 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 76 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 77 | if (promotion) { | ||
| 78 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 79 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 80 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 81 | } | ||
| 82 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 83 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 84 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 88 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 89 | |||
| 90 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 91 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 92 | |||
| 93 | const IR::FpControl fp_control{ | ||
| 94 | .no_contraction{true}, | ||
| 95 | .rounding{IR::FpRounding::DontCare}, | ||
| 96 | .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||
| 97 | }; | ||
| 98 | IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; | ||
| 99 | IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; | ||
| 100 | if (sat) { | ||
| 101 | lhs = v.ir.FPSaturate(lhs); | ||
| 102 | rhs = v.ir.FPSaturate(rhs); | ||
| 103 | } | ||
| 104 | if (promotion) { | ||
| 105 | lhs = v.ir.FPConvert(16, lhs); | ||
| 106 | rhs = v.ir.FPConvert(16, rhs); | ||
| 107 | } | ||
| 108 | v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); | ||
| 109 | } | ||
| 110 | } // Anonymous namespace | ||
| 111 | |||
| 112 | void TranslatorVisitor::HADD2_reg(u64 insn) { | ||
| 113 | union { | ||
| 114 | u64 raw; | ||
| 115 | BitField<49, 2, Merge> merge; | ||
| 116 | BitField<39, 1, u64> ftz; | ||
| 117 | BitField<32, 1, u64> sat; | ||
| 118 | BitField<43, 1, u64> neg_a; | ||
| 119 | BitField<44, 1, u64> abs_a; | ||
| 120 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 121 | BitField<31, 1, u64> neg_b; | ||
| 122 | BitField<30, 1, u64> abs_b; | ||
| 123 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 124 | } const hadd2{insn}; | ||
| 125 | |||
| 126 | HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||
| 127 | hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, | ||
| 128 | GetReg20(insn)); | ||
| 129 | } | ||
| 130 | |||
| 131 | void TranslatorVisitor::HADD2_cbuf(u64 insn) { | ||
| 132 | union { | ||
| 133 | u64 raw; | ||
| 134 | BitField<49, 2, Merge> merge; | ||
| 135 | BitField<39, 1, u64> ftz; | ||
| 136 | BitField<52, 1, u64> sat; | ||
| 137 | BitField<43, 1, u64> neg_a; | ||
| 138 | BitField<44, 1, u64> abs_a; | ||
| 139 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 140 | BitField<56, 1, u64> neg_b; | ||
| 141 | BitField<54, 1, u64> abs_b; | ||
| 142 | } const hadd2{insn}; | ||
| 143 | |||
| 144 | HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||
| 145 | hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, | ||
| 146 | GetCbuf(insn)); | ||
| 147 | } | ||
| 148 | |||
| 149 | void TranslatorVisitor::HADD2_imm(u64 insn) { | ||
| 150 | union { | ||
| 151 | u64 raw; | ||
| 152 | BitField<49, 2, Merge> merge; | ||
| 153 | BitField<39, 1, u64> ftz; | ||
| 154 | BitField<52, 1, u64> sat; | ||
| 155 | BitField<43, 1, u64> neg_a; | ||
| 156 | BitField<44, 1, u64> abs_a; | ||
| 157 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 158 | BitField<56, 1, u64> neg_high; | ||
| 159 | BitField<30, 9, u64> high; | ||
| 160 | BitField<29, 1, u64> neg_low; | ||
| 161 | BitField<20, 9, u64> low; | ||
| 162 | } const hadd2{insn}; | ||
| 163 | |||
| 164 | const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 165 | static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 166 | HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||
| 167 | hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 168 | } | ||
| 169 | |||
| 170 | void TranslatorVisitor::HADD2_32I(u64 insn) { | ||
| 171 | union { | ||
| 172 | u64 raw; | ||
| 173 | BitField<55, 1, u64> ftz; | ||
| 174 | BitField<52, 1, u64> sat; | ||
| 175 | BitField<56, 1, u64> neg_a; | ||
| 176 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 177 | BitField<20, 32, u64> imm32; | ||
| 178 | } const hadd2{insn}; | ||
| 179 | |||
| 180 | const u32 imm{static_cast<u32>(hadd2.imm32)}; | ||
| 181 | HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, | ||
| 182 | hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 183 | } | ||
| 184 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 727524284..748b856c9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp | |||
| @@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) { | |||
| 59 | const IR::U64 address{[&]() -> IR::U64 { | 59 | const IR::U64 address{[&]() -> IR::U64 { |
| 60 | if (mem.e == 0) { | 60 | if (mem.e == 0) { |
| 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it | 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it |
| 62 | return v.ir.ConvertU(64, v.X(mem.addr_reg)); | 62 | return v.ir.UConvert(64, v.X(mem.addr_reg)); |
| 63 | } | 63 | } |
| 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { | 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { |
| 65 | throw NotImplementedException("Unaligned address register"); | 65 | throw NotImplementedException("Unaligned address register"); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0535f1c2..c24f29ff7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { | |||
| 265 | ThrowNotImplemented(Opcode::GETLMEMBASE); | 265 | ThrowNotImplemented(Opcode::GETLMEMBASE); |
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | void TranslatorVisitor::HADD2_reg(u64) { | ||
| 269 | ThrowNotImplemented(Opcode::HADD2_reg); | ||
| 270 | } | ||
| 271 | |||
| 272 | void TranslatorVisitor::HADD2_cbuf(u64) { | ||
| 273 | ThrowNotImplemented(Opcode::HADD2_cbuf); | ||
| 274 | } | ||
| 275 | |||
| 276 | void TranslatorVisitor::HADD2_imm(u64) { | ||
| 277 | ThrowNotImplemented(Opcode::HADD2_imm); | ||
| 278 | } | ||
| 279 | |||
| 280 | void TranslatorVisitor::HADD2_32I(u64) { | ||
| 281 | ThrowNotImplemented(Opcode::HADD2_32I); | ||
| 282 | } | ||
| 283 | |||
| 284 | void TranslatorVisitor::HFMA2_reg(u64) { | 268 | void TranslatorVisitor::HFMA2_reg(u64) { |
| 285 | ThrowNotImplemented(Opcode::HFMA2_reg); | 269 | ThrowNotImplemented(Opcode::HFMA2_reg); |
| 286 | } | 270 | } |