summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-05-25 02:22:21 -0300
committerGravatar ameerj2021-07-22 21:51:33 -0400
commitca05a13c62ad7693f8be924c168e400e8139b0d2 (patch)
tree813638ab0c537089f3493f824707417dd429a48f /src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
parentglasm: Fix usage counting on phi nodes (diff)
downloadyuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.tar.gz
yuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.tar.xz
yuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.zip
glasm: Catch more register leaks
Add support for null registers. These are used when an instruction has no usages. This comes handy when an instruction is only used for its CC value, with the caveat of having to invalidate all pseudo-instructions before defining the instruction itself in the register allocator. This commits changes this. Workaround a bug on Nvidia's condition codes conditional execution using branches.
Diffstat (limited to 'src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp')
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp58
1 files changed, 40 insertions, 18 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
index e5aac14c8..e9d1bae05 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -9,6 +9,17 @@
9namespace Shader::Backend::GLASM { 9namespace Shader::Backend::GLASM {
10 10
11void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { 11void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
12 const std::array flags{
13 inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
14 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
15 inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
16 inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
17 };
18 for (IR::Inst* const flag_inst : flags) {
19 if (flag_inst) {
20 flag_inst->Invalidate();
21 }
22 }
12 const bool cc{inst.HasAssociatedPseudoOperation()}; 23 const bool cc{inst.HasAssociatedPseudoOperation()};
13 const std::string_view cc_mod{cc ? ".CC" : ""}; 24 const std::string_view cc_mod{cc ? ".CC" : ""};
14 if (cc) { 25 if (cc) {
@@ -19,20 +30,22 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
19 if (!cc) { 30 if (!cc) {
20 return; 31 return;
21 } 32 }
22 static constexpr std::array<std::string_view, 4> masks{"EQ", "SF", "CF", "OF"}; 33 static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
23 const std::array flags{ 34 for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
24 inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), 35 if (!flags[flag_index]) {
25 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), 36 continue;
26 inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), 37 }
27 inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), 38 const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
28 }; 39 if (flag_index == 0) {
29 for (size_t i = 0; i < flags.size(); ++i) { 40 ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
30 if (flags[i]) { 41 } else {
31 const auto flag_ret{ctx.reg_alloc.Define(*flags[i])}; 42 // We could use conditional execution here, but it's broken on Nvidia's compiler
32 ctx.Add("MOV.S {},0;" 43 ctx.Add("IF {}.x;"
33 "MOV.S {}({}.x),-1;", 44 "MOV.S {}.x,-1;"
34 flag_ret, flag_ret, masks[i]); 45 "ELSE;"
35 flags[i]->Invalidate(); 46 "MOV.S {}.x,0;"
47 "ENDIF;",
48 masks[flag_index], flag_ret, flag_ret);
36 } 49 }
37 } 50 }
38} 51}
@@ -136,6 +149,17 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal
136 149
137void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, 150void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
138 ScalarU32 count) { 151 ScalarU32 count) {
152 const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
153 const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
154 if (zero) {
155 zero->Invalidate();
156 }
157 if (sign) {
158 sign->Invalidate();
159 }
160 if (zero || sign) {
161 ctx.reg_alloc.InvalidateConditionCodes();
162 }
139 const Register ret{ctx.reg_alloc.Define(inst)}; 163 const Register ret{ctx.reg_alloc.Define(inst)};
140 if (count.type != Type::Register && offset.type != Type::Register) { 164 if (count.type != Type::Register && offset.type != Type::Register) {
141 ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); 165 ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
@@ -145,13 +169,11 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal
145 "BFE.U {},RC,{};", 169 "BFE.U {},RC,{};",
146 count, offset, ret, base); 170 count, offset, ret, base);
147 } 171 }
148 if (const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)) { 172 if (zero) {
149 ctx.Add("SEQ.S {},{},0;", *zero, ret); 173 ctx.Add("SEQ.S {},{},0;", *zero, ret);
150 zero->Invalidate();
151 } 174 }
152 if (const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)) { 175 if (sign) {
153 ctx.Add("SLT.S {},{},0;", *sign, ret); 176 ctx.Add("SLT.S {},{},0;", *sign, ret);
154 sign->Invalidate();
155 } 177 }
156} 178}
157 179