summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-05-25 02:22:21 -0300
committerGravatar ameerj2021-07-22 21:51:33 -0400
commitca05a13c62ad7693f8be924c168e400e8139b0d2 (patch)
tree813638ab0c537089f3493f824707417dd429a48f /src/shader_recompiler
parentglasm: Fix usage counting on phi nodes (diff)
downloadyuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.tar.gz
yuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.tar.xz
yuzu-ca05a13c62ad7693f8be924c168e400e8139b0d2.zip
glasm: Catch more register leaks
Add support for null registers. These are used when an instruction has no usages. This comes handy when an instruction is only used for its CC value, with the caveat of having to invalidate all pseudo-instructions before defining the instruction itself in the register allocator. This commits changes this. Workaround a bug on Nvidia's condition codes conditional execution using branches.
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.cpp8
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp8
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp4
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp27
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp58
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp6
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.cpp28
-rw-r--r--src/shader_recompiler/backend/glasm/reg_alloc.h16
8 files changed, 114 insertions, 41 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
index 2ce839059..4aa3682c2 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -203,7 +203,13 @@ void Precolor(EmitContext& ctx, const IR::Program& program) {
203 for (size_t i = 0; i < num_args; ++i) { 203 for (size_t i = 0; i < num_args; ++i) {
204 IR::Block& phi_block{*phi.PhiBlock(i)}; 204 IR::Block& phi_block{*phi.PhiBlock(i)};
205 auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; 205 auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
206 IR::IREmitter{phi_block, it}.PhiMove(phi, phi.Arg(i)); 206 IR::IREmitter ir{phi_block, it};
207 const IR::Value arg{phi.Arg(i)};
208 if (arg.IsImmediate()) {
209 ir.PhiMove(phi, arg);
210 } else {
211 ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
212 }
207 } 213 }
208 for (size_t i = 0; i < num_args; ++i) { 214 for (size_t i = 0; i < num_args; ++i) {
209 IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); 215 IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
index 808c72105..9201ccd39 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -23,7 +23,13 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) {
23} 23}
24 24
25void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { 25void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) {
26 ctx.Add("MOV.S {},{};", inst, ScalarS32{ctx.reg_alloc.Consume(value)}); 26 // Fake one usage to get a real register out of the condition
27 inst.DestructiveAddUsage(1);
28 const Register ret{ctx.reg_alloc.Define(inst)};
29 const ScalarS32 input{ctx.reg_alloc.Consume(value)};
30 if (ret != input) {
31 ctx.Add("MOV.S {},{};", ret, input);
32 }
27} 33}
28 34
29void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { 35void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
index d829f05b3..bff0b7c1c 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp
@@ -52,7 +52,9 @@ void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, Objec
52 // The input composite is not aliased with the return value so we have to copy it before 52 // The input composite is not aliased with the return value so we have to copy it before
53 // hand. But the insert object is not aliased with the return value, so we don't have to 53 // hand. But the insert object is not aliased with the return value, so we don't have to
54 // worry about that 54 // worry about that
55 ctx.Add("MOV.{} {},{};MOV.{} {}.{},{};", type, ret, composite, type, ret, swizzle, object); 55 ctx.Add("MOV.{} {},{};"
56 "MOV.{} {}.{},{};",
57 type, ret, composite, type, ret, swizzle, object);
56 } else { 58 } else {
57 // The return value is alised so we can just insert the object, it doesn't matter if it's 59 // The return value is alised so we can just insert the object, it doesn't matter if it's
58 // aliased 60 // aliased
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
index a7def0897..34725b8c6 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -181,7 +181,6 @@ void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) {
181 ctx.Add("MOV.S {},-1;" 181 ctx.Add("MOV.S {},-1;"
182 "MOV.S {}(NONRESIDENT),0;", 182 "MOV.S {}(NONRESIDENT),0;",
183 sparse_ret, sparse_ret); 183 sparse_ret, sparse_ret);
184 sparse_inst->Invalidate();
185} 184}
186 185
187std::string_view FormatStorage(ImageFormat format) { 186std::string_view FormatStorage(ImageFormat format) {
@@ -215,12 +214,20 @@ void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Regis
215 const Register ret{ctx.reg_alloc.Define(inst)}; 214 const Register ret{ctx.reg_alloc.Define(inst)};
216 ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); 215 ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type);
217} 216}
217
218IR::Inst* PrepareSparse(IR::Inst& inst) {
219 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
220 if (sparse_inst) {
221 sparse_inst->Invalidate();
222 }
223 return sparse_inst;
224}
218} // Anonymous namespace 225} // Anonymous namespace
219 226
220void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, 227void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
221 const IR::Value& coord, Register bias_lc, const IR::Value& offset) { 228 const IR::Value& coord, Register bias_lc, const IR::Value& offset) {
222 const auto info{inst.Flags<IR::TextureInstInfo>()}; 229 const auto info{inst.Flags<IR::TextureInstInfo>()};
223 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; 230 const auto sparse_inst{PrepareSparse(inst)};
224 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 231 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
225 const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; 232 const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""};
226 const std::string_view type{TextureType(info)}; 233 const std::string_view type{TextureType(info)};
@@ -259,7 +266,7 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu
259void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, 266void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
260 const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { 267 const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) {
261 const auto info{inst.Flags<IR::TextureInstInfo>()}; 268 const auto info{inst.Flags<IR::TextureInstInfo>()};
262 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; 269 const auto sparse_inst{PrepareSparse(inst)};
263 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 270 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
264 const std::string_view type{TextureType(info)}; 271 const std::string_view type{TextureType(info)};
265 const std::string texture{Texture(ctx, info, index)}; 272 const std::string texture{Texture(ctx, info, index)};
@@ -288,7 +295,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
288 } 295 }
289 const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; 296 const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
290 const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; 297 const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)};
291 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; 298 const auto sparse_inst{PrepareSparse(inst)};
292 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 299 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
293 const std::string_view type{TextureType(info)}; 300 const std::string_view type{TextureType(info)};
294 const std::string texture{Texture(ctx, info, index)}; 301 const std::string texture{Texture(ctx, info, index)};
@@ -393,7 +400,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::
393 } 400 }
394 const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; 401 const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)};
395 const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; 402 const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)};
396 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; 403 const auto sparse_inst{PrepareSparse(inst)};
397 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 404 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
398 const std::string_view type{TextureType(info)}; 405 const std::string_view type{TextureType(info)};
399 const std::string texture{Texture(ctx, info, index)}; 406 const std::string texture{Texture(ctx, info, index)};
@@ -436,7 +443,7 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
436 const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; 443 const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
437 const auto info{inst.Flags<IR::TextureInstInfo>()}; 444 const auto info{inst.Flags<IR::TextureInstInfo>()};
438 const char comp{"xyzw"[info.gather_component]}; 445 const char comp{"xyzw"[info.gather_component]};
439 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; 446 const auto sparse_inst{PrepareSparse(inst)};
440 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 447 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
441 const std::string_view type{TextureType(info)}; 448 const std::string_view type{TextureType(info)};
442 const std::string texture{Texture(ctx, info, index)}; 449 const std::string texture{Texture(ctx, info, index)};
@@ -462,7 +469,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
462 // Allocate offsets early so they don't overwrite any consumed register 469 // Allocate offsets early so they don't overwrite any consumed register
463 const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; 470 const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)};
464 const auto info{inst.Flags<IR::TextureInstInfo>()}; 471 const auto info{inst.Flags<IR::TextureInstInfo>()};
465 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; 472 const auto sparse_inst{PrepareSparse(inst)};
466 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 473 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
467 const std::string_view type{TextureType(info)}; 474 const std::string_view type{TextureType(info)};
468 const std::string texture{Texture(ctx, info, index)}; 475 const std::string texture{Texture(ctx, info, index)};
@@ -500,7 +507,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
500void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, 507void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
501 const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { 508 const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
502 const auto info{inst.Flags<IR::TextureInstInfo>()}; 509 const auto info{inst.Flags<IR::TextureInstInfo>()};
503 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; 510 const auto sparse_inst{PrepareSparse(inst)};
504 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 511 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
505 const std::string_view type{TextureType(info)}; 512 const std::string_view type{TextureType(info)};
506 const std::string texture{Texture(ctx, info, index)}; 513 const std::string texture{Texture(ctx, info, index)};
@@ -547,7 +554,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
547 dpdx = ScopedRegister{ctx.reg_alloc}; 554 dpdx = ScopedRegister{ctx.reg_alloc};
548 dpdy = ScopedRegister{ctx.reg_alloc}; 555 dpdy = ScopedRegister{ctx.reg_alloc};
549 } 556 }
550 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; 557 const auto sparse_inst{PrepareSparse(inst)};
551 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 558 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
552 const std::string_view type{TextureType(info)}; 559 const std::string_view type{TextureType(info)};
553 const std::string texture{Texture(ctx, info, index)}; 560 const std::string texture{Texture(ctx, info, index)};
@@ -581,7 +588,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
581 588
582void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { 589void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) {
583 const auto info{inst.Flags<IR::TextureInstInfo>()}; 590 const auto info{inst.Flags<IR::TextureInstInfo>()};
584 const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; 591 const auto sparse_inst{PrepareSparse(inst)};
585 const std::string_view format{FormatStorage(info.image_format)}; 592 const std::string_view format{FormatStorage(info.image_format)};
586 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 593 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
587 const std::string_view type{TextureType(info)}; 594 const std::string_view type{TextureType(info)};
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
index e5aac14c8..e9d1bae05 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp
@@ -9,6 +9,17 @@
9namespace Shader::Backend::GLASM { 9namespace Shader::Backend::GLASM {
10 10
11void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { 11void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
12 const std::array flags{
13 inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp),
14 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp),
15 inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp),
16 inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp),
17 };
18 for (IR::Inst* const flag_inst : flags) {
19 if (flag_inst) {
20 flag_inst->Invalidate();
21 }
22 }
12 const bool cc{inst.HasAssociatedPseudoOperation()}; 23 const bool cc{inst.HasAssociatedPseudoOperation()};
13 const std::string_view cc_mod{cc ? ".CC" : ""}; 24 const std::string_view cc_mod{cc ? ".CC" : ""};
14 if (cc) { 25 if (cc) {
@@ -19,20 +30,22 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) {
19 if (!cc) { 30 if (!cc) {
20 return; 31 return;
21 } 32 }
22 static constexpr std::array<std::string_view, 4> masks{"EQ", "SF", "CF", "OF"}; 33 static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"};
23 const std::array flags{ 34 for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) {
24 inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), 35 if (!flags[flag_index]) {
25 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), 36 continue;
26 inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), 37 }
27 inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), 38 const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])};
28 }; 39 if (flag_index == 0) {
29 for (size_t i = 0; i < flags.size(); ++i) { 40 ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret);
30 if (flags[i]) { 41 } else {
31 const auto flag_ret{ctx.reg_alloc.Define(*flags[i])}; 42 // We could use conditional execution here, but it's broken on Nvidia's compiler
32 ctx.Add("MOV.S {},0;" 43 ctx.Add("IF {}.x;"
33 "MOV.S {}({}.x),-1;", 44 "MOV.S {}.x,-1;"
34 flag_ret, flag_ret, masks[i]); 45 "ELSE;"
35 flags[i]->Invalidate(); 46 "MOV.S {}.x,0;"
47 "ENDIF;",
48 masks[flag_index], flag_ret, flag_ret);
36 } 49 }
37 } 50 }
38} 51}
@@ -136,6 +149,17 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal
136 149
137void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, 150void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset,
138 ScalarU32 count) { 151 ScalarU32 count) {
152 const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp);
153 const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp);
154 if (zero) {
155 zero->Invalidate();
156 }
157 if (sign) {
158 sign->Invalidate();
159 }
160 if (zero || sign) {
161 ctx.reg_alloc.InvalidateConditionCodes();
162 }
139 const Register ret{ctx.reg_alloc.Define(inst)}; 163 const Register ret{ctx.reg_alloc.Define(inst)};
140 if (count.type != Type::Register && offset.type != Type::Register) { 164 if (count.type != Type::Register && offset.type != Type::Register) {
141 ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); 165 ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base);
@@ -145,13 +169,11 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal
145 "BFE.U {},RC,{};", 169 "BFE.U {},RC,{};",
146 count, offset, ret, base); 170 count, offset, ret, base);
147 } 171 }
148 if (const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)) { 172 if (zero) {
149 ctx.Add("SEQ.S {},{},0;", *zero, ret); 173 ctx.Add("SEQ.S {},{},0;", *zero, ret);
150 zero->Invalidate();
151 } 174 }
152 if (const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)) { 175 if (sign) {
153 ctx.Add("SLT.S {},{},0;", *sign, ret); 176 ctx.Add("SLT.S {},{},0;", *sign, ret);
154 sign->Invalidate();
155 } 177 }
156} 178}
157 179
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
index af0e13d43..6e30790bb 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp
@@ -51,6 +51,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
51static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, 51static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index,
52 const IR::Value& clamp, const IR::Value& segmentation_mask, 52 const IR::Value& clamp, const IR::Value& segmentation_mask,
53 std::string_view op) { 53 std::string_view op) {
54 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
55 if (in_bounds) {
56 in_bounds->Invalidate();
57 }
54 std::string mask; 58 std::string mask;
55 if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { 59 if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) {
56 mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); 60 mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8));
@@ -61,13 +65,11 @@ static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32
61 ScalarU32{ctx.reg_alloc.Consume(clamp)}); 65 ScalarU32{ctx.reg_alloc.Consume(clamp)});
62 } 66 }
63 const Register value_ret{ctx.reg_alloc.Define(inst)}; 67 const Register value_ret{ctx.reg_alloc.Define(inst)};
64 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
65 if (in_bounds) { 68 if (in_bounds) {
66 const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; 69 const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)};
67 ctx.Add("SHF{}.U {},{},{},{};" 70 ctx.Add("SHF{}.U {},{},{},{};"
68 "MOV.U {}.x,{}.y;", 71 "MOV.U {}.x,{}.y;",
69 op, bounds_ret, value, index, mask, value_ret, bounds_ret); 72 op, bounds_ret, value, index, mask, value_ret, bounds_ret);
70 in_bounds->Invalidate();
71 } else { 73 } else {
72 ctx.Add("SHF{}.U {},{},{},{};" 74 ctx.Add("SHF{}.U {},{},{},{};"
73 "MOV.U {}.x,{}.y;", 75 "MOV.U {}.x,{}.y;",
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
index 707b22247..c55a833c6 100644
--- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp
@@ -22,11 +22,19 @@ Register RegAlloc::LongDefine(IR::Inst& inst) {
22} 22}
23 23
24Value RegAlloc::Peek(const IR::Value& value) { 24Value RegAlloc::Peek(const IR::Value& value) {
25 return value.IsImmediate() ? MakeImm(value) : PeekInst(*value.InstRecursive()); 25 if (value.IsImmediate()) {
26 return MakeImm(value);
27 } else {
28 return PeekInst(*value.Inst());
29 }
26} 30}
27 31
28Value RegAlloc::Consume(const IR::Value& value) { 32Value RegAlloc::Consume(const IR::Value& value) {
29 return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); 33 if (value.IsImmediate()) {
34 return MakeImm(value);
35 } else {
36 return ConsumeInst(*value.Inst());
37 }
30} 38}
31 39
32void RegAlloc::Unref(IR::Inst& inst) { 40void RegAlloc::Unref(IR::Inst& inst) {
@@ -88,7 +96,14 @@ Value RegAlloc::MakeImm(const IR::Value& value) {
88} 96}
89 97
90Register RegAlloc::Define(IR::Inst& inst, bool is_long) { 98Register RegAlloc::Define(IR::Inst& inst, bool is_long) {
91 inst.SetDefinition<Id>(Alloc(is_long)); 99 if (inst.HasUses()) {
100 inst.SetDefinition<Id>(Alloc(is_long));
101 } else {
102 Id id{};
103 id.is_long.Assign(is_long ? 1 : 0);
104 id.is_null.Assign(1);
105 inst.SetDefinition<Id>(id);
106 }
92 return Register{PeekInst(inst)}; 107 return Register{PeekInst(inst)};
93} 108}
94 109
@@ -115,10 +130,12 @@ Id RegAlloc::Alloc(bool is_long) {
115 num_regs = std::max(num_regs, reg + 1); 130 num_regs = std::max(num_regs, reg + 1);
116 use[reg] = true; 131 use[reg] = true;
117 Id ret{}; 132 Id ret{};
118 ret.index.Assign(static_cast<u32>(reg)); 133 ret.is_valid.Assign(1);
119 ret.is_long.Assign(is_long ? 1 : 0); 134 ret.is_long.Assign(is_long ? 1 : 0);
120 ret.is_spill.Assign(0); 135 ret.is_spill.Assign(0);
121 ret.is_condition_code.Assign(0); 136 ret.is_condition_code.Assign(0);
137 ret.is_null.Assign(0);
138 ret.index.Assign(static_cast<u32>(reg));
122 return ret; 139 return ret;
123 } 140 }
124 } 141 }
@@ -126,6 +143,9 @@ Id RegAlloc::Alloc(bool is_long) {
126} 143}
127 144
128void RegAlloc::Free(Id id) { 145void RegAlloc::Free(Id id) {
146 if (id.is_valid == 0) {
147 throw LogicError("Freeing invalid register");
148 }
129 if (id.is_spill != 0) { 149 if (id.is_spill != 0) {
130 throw NotImplementedException("Free spill"); 150 throw NotImplementedException("Free spill");
131 } 151 }
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h
index 41b7c92be..b97c84146 100644
--- a/src/shader_recompiler/backend/glasm/reg_alloc.h
+++ b/src/shader_recompiler/backend/glasm/reg_alloc.h
@@ -35,10 +35,12 @@ enum class Type : u32 {
35struct Id { 35struct Id {
36 union { 36 union {
37 u32 raw; 37 u32 raw;
38 BitField<0, 29, u32> index; 38 BitField<0, 1, u32> is_valid;
39 BitField<29, 1, u32> is_long; 39 BitField<1, 1, u32> is_long;
40 BitField<30, 1, u32> is_spill; 40 BitField<2, 1, u32> is_spill;
41 BitField<31, 1, u32> is_condition_code; 41 BitField<3, 1, u32> is_condition_code;
42 BitField<4, 1, u32> is_null;
43 BitField<5, 27, u32> index;
42 }; 44 };
43 45
44 bool operator==(Id rhs) const noexcept { 46 bool operator==(Id rhs) const noexcept {
@@ -164,12 +166,18 @@ auto FormatTo(FormatContext& ctx, Id id) {
164 throw NotImplementedException("Spill emission"); 166 throw NotImplementedException("Spill emission");
165 } 167 }
166 if constexpr (scalar) { 168 if constexpr (scalar) {
169 if (id.is_null != 0) {
170 return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x");
171 }
167 if (id.is_long != 0) { 172 if (id.is_long != 0) {
168 return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); 173 return fmt::format_to(ctx.out(), "D{}.x", id.index.Value());
169 } else { 174 } else {
170 return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); 175 return fmt::format_to(ctx.out(), "R{}.x", id.index.Value());
171 } 176 }
172 } else { 177 } else {
178 if (id.is_null != 0) {
179 return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC");
180 }
173 if (id.is_long != 0) { 181 if (id.is_long != 0) {
174 return fmt::format_to(ctx.out(), "D{}", id.index.Value()); 182 return fmt::format_to(ctx.out(), "D{}", id.index.Value());
175 } else { 183 } else {