diff options
Diffstat (limited to 'src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp')
| -rw-r--r-- | src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp | 58 |
1 files changed, 40 insertions, 18 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index e5aac14c8..e9d1bae05 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp | |||
| @@ -9,6 +9,17 @@ | |||
| 9 | namespace Shader::Backend::GLASM { | 9 | namespace Shader::Backend::GLASM { |
| 10 | 10 | ||
| 11 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | 11 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { |
| 12 | const std::array flags{ | ||
| 13 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), | ||
| 14 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), | ||
| 15 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), | ||
| 16 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), | ||
| 17 | }; | ||
| 18 | for (IR::Inst* const flag_inst : flags) { | ||
| 19 | if (flag_inst) { | ||
| 20 | flag_inst->Invalidate(); | ||
| 21 | } | ||
| 22 | } | ||
| 12 | const bool cc{inst.HasAssociatedPseudoOperation()}; | 23 | const bool cc{inst.HasAssociatedPseudoOperation()}; |
| 13 | const std::string_view cc_mod{cc ? ".CC" : ""}; | 24 | const std::string_view cc_mod{cc ? ".CC" : ""}; |
| 14 | if (cc) { | 25 | if (cc) { |
| @@ -19,20 +30,22 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | |||
| 19 | if (!cc) { | 30 | if (!cc) { |
| 20 | return; | 31 | return; |
| 21 | } | 32 | } |
| 22 | static constexpr std::array<std::string_view, 4> masks{"EQ", "SF", "CF", "OF"}; | 33 | static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"}; |
| 23 | const std::array flags{ | 34 | for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) { |
| 24 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), | 35 | if (!flags[flag_index]) { |
| 25 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), | 36 | continue; |
| 26 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), | 37 | } |
| 27 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), | 38 | const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])}; |
| 28 | }; | 39 | if (flag_index == 0) { |
| 29 | for (size_t i = 0; i < flags.size(); ++i) { | 40 | ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret); |
| 30 | if (flags[i]) { | 41 | } else { |
| 31 | const auto flag_ret{ctx.reg_alloc.Define(*flags[i])}; | 42 | // We could use conditional execution here, but it's broken on Nvidia's compiler |
| 32 | ctx.Add("MOV.S {},0;" | 43 | ctx.Add("IF {}.x;" |
| 33 | "MOV.S {}({}.x),-1;", | 44 | "MOV.S {}.x,-1;" |
| 34 | flag_ret, flag_ret, masks[i]); | 45 | "ELSE;" |
| 35 | flags[i]->Invalidate(); | 46 | "MOV.S {}.x,0;" |
| 47 | "ENDIF;", | ||
| 48 | masks[flag_index], flag_ret, flag_ret); | ||
| 36 | } | 49 | } |
| 37 | } | 50 | } |
| 38 | } | 51 | } |
| @@ -136,6 +149,17 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal | |||
| 136 | 149 | ||
| 137 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, | 150 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, |
| 138 | ScalarU32 count) { | 151 | ScalarU32 count) { |
| 152 | const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); | ||
| 153 | const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); | ||
| 154 | if (zero) { | ||
| 155 | zero->Invalidate(); | ||
| 156 | } | ||
| 157 | if (sign) { | ||
| 158 | sign->Invalidate(); | ||
| 159 | } | ||
| 160 | if (zero || sign) { | ||
| 161 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 162 | } | ||
| 139 | const Register ret{ctx.reg_alloc.Define(inst)}; | 163 | const Register ret{ctx.reg_alloc.Define(inst)}; |
| 140 | if (count.type != Type::Register && offset.type != Type::Register) { | 164 | if (count.type != Type::Register && offset.type != Type::Register) { |
| 141 | ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); | 165 | ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); |
| @@ -145,13 +169,11 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal | |||
| 145 | "BFE.U {},RC,{};", | 169 | "BFE.U {},RC,{};", |
| 146 | count, offset, ret, base); | 170 | count, offset, ret, base); |
| 147 | } | 171 | } |
| 148 | if (const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)) { | 172 | if (zero) { |
| 149 | ctx.Add("SEQ.S {},{},0;", *zero, ret); | 173 | ctx.Add("SEQ.S {},{},0;", *zero, ret); |
| 150 | zero->Invalidate(); | ||
| 151 | } | 174 | } |
| 152 | if (const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)) { | 175 | if (sign) { |
| 153 | ctx.Add("SLT.S {},{},0;", *sign, ret); | 176 | ctx.Add("SLT.S {},{},0;", *sign, ret); |
| 154 | sign->Invalidate(); | ||
| 155 | } | 177 | } |
| 156 | } | 178 | } |
| 157 | 179 | ||