diff options
Diffstat (limited to 'src/shader_recompiler')
8 files changed, 114 insertions, 41 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 2ce839059..4aa3682c2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp | |||
| @@ -203,7 +203,13 @@ void Precolor(EmitContext& ctx, const IR::Program& program) { | |||
| 203 | for (size_t i = 0; i < num_args; ++i) { | 203 | for (size_t i = 0; i < num_args; ++i) { |
| 204 | IR::Block& phi_block{*phi.PhiBlock(i)}; | 204 | IR::Block& phi_block{*phi.PhiBlock(i)}; |
| 205 | auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; | 205 | auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; |
| 206 | IR::IREmitter{phi_block, it}.PhiMove(phi, phi.Arg(i)); | 206 | IR::IREmitter ir{phi_block, it}; |
| 207 | const IR::Value arg{phi.Arg(i)}; | ||
| 208 | if (arg.IsImmediate()) { | ||
| 209 | ir.PhiMove(phi, arg); | ||
| 210 | } else { | ||
| 211 | ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); | ||
| 212 | } | ||
| 207 | } | 213 | } |
| 208 | for (size_t i = 0; i < num_args; ++i) { | 214 | for (size_t i = 0; i < num_args; ++i) { |
| 209 | IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); | 215 | IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 808c72105..9201ccd39 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp | |||
| @@ -23,7 +23,13 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { | |||
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { | 25 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { |
| 26 | ctx.Add("MOV.S {},{};", inst, ScalarS32{ctx.reg_alloc.Consume(value)}); | 26 | // Fake one usage to get a real register out of the condition |
| 27 | inst.DestructiveAddUsage(1); | ||
| 28 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 29 | const ScalarS32 input{ctx.reg_alloc.Consume(value)}; | ||
| 30 | if (ret != input) { | ||
| 31 | ctx.Add("MOV.S {},{};", ret, input); | ||
| 32 | } | ||
| 27 | } | 33 | } |
| 28 | 34 | ||
| 29 | void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { | 35 | void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp index d829f05b3..bff0b7c1c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp | |||
| @@ -52,7 +52,9 @@ void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, Objec | |||
| 52 | // The input composite is not aliased with the return value so we have to copy it before | 52 | // The input composite is not aliased with the return value so we have to copy it before |
| 53 | // hand. But the insert object is not aliased with the return value, so we don't have to | 53 | // hand. But the insert object is not aliased with the return value, so we don't have to |
| 54 | // worry about that | 54 | // worry about that |
| 55 | ctx.Add("MOV.{} {},{};MOV.{} {}.{},{};", type, ret, composite, type, ret, swizzle, object); | 55 | ctx.Add("MOV.{} {},{};" |
| 56 | "MOV.{} {}.{},{};", | ||
| 57 | type, ret, composite, type, ret, swizzle, object); | ||
| 56 | } else { | 58 | } else { |
| 57 | // The return value is alised so we can just insert the object, it doesn't matter if it's | 59 | // The return value is alised so we can just insert the object, it doesn't matter if it's |
| 58 | // aliased | 60 | // aliased |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index a7def0897..34725b8c6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | |||
| @@ -181,7 +181,6 @@ void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { | |||
| 181 | ctx.Add("MOV.S {},-1;" | 181 | ctx.Add("MOV.S {},-1;" |
| 182 | "MOV.S {}(NONRESIDENT),0;", | 182 | "MOV.S {}(NONRESIDENT),0;", |
| 183 | sparse_ret, sparse_ret); | 183 | sparse_ret, sparse_ret); |
| 184 | sparse_inst->Invalidate(); | ||
| 185 | } | 184 | } |
| 186 | 185 | ||
| 187 | std::string_view FormatStorage(ImageFormat format) { | 186 | std::string_view FormatStorage(ImageFormat format) { |
| @@ -215,12 +214,20 @@ void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Regis | |||
| 215 | const Register ret{ctx.reg_alloc.Define(inst)}; | 214 | const Register ret{ctx.reg_alloc.Define(inst)}; |
| 216 | ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); | 215 | ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); |
| 217 | } | 216 | } |
| 217 | |||
| 218 | IR::Inst* PrepareSparse(IR::Inst& inst) { | ||
| 219 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | ||
| 220 | if (sparse_inst) { | ||
| 221 | sparse_inst->Invalidate(); | ||
| 222 | } | ||
| 223 | return sparse_inst; | ||
| 224 | } | ||
| 218 | } // Anonymous namespace | 225 | } // Anonymous namespace |
| 219 | 226 | ||
| 220 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | 227 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, |
| 221 | const IR::Value& coord, Register bias_lc, const IR::Value& offset) { | 228 | const IR::Value& coord, Register bias_lc, const IR::Value& offset) { |
| 222 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 229 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 223 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | 230 | const auto sparse_inst{PrepareSparse(inst)}; |
| 224 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 231 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| 225 | const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; | 232 | const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; |
| 226 | const std::string_view type{TextureType(info)}; | 233 | const std::string_view type{TextureType(info)}; |
| @@ -259,7 +266,7 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu | |||
| 259 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | 266 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, |
| 260 | const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { | 267 | const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { |
| 261 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 268 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 262 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | 269 | const auto sparse_inst{PrepareSparse(inst)}; |
| 263 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 270 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| 264 | const std::string_view type{TextureType(info)}; | 271 | const std::string_view type{TextureType(info)}; |
| 265 | const std::string texture{Texture(ctx, info, index)}; | 272 | const std::string texture{Texture(ctx, info, index)}; |
| @@ -288,7 +295,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: | |||
| 288 | } | 295 | } |
| 289 | const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; | 296 | const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; |
| 290 | const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; | 297 | const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; |
| 291 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | 298 | const auto sparse_inst{PrepareSparse(inst)}; |
| 292 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 299 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| 293 | const std::string_view type{TextureType(info)}; | 300 | const std::string_view type{TextureType(info)}; |
| 294 | const std::string texture{Texture(ctx, info, index)}; | 301 | const std::string texture{Texture(ctx, info, index)}; |
| @@ -393,7 +400,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: | |||
| 393 | } | 400 | } |
| 394 | const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; | 401 | const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; |
| 395 | const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; | 402 | const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; |
| 396 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | 403 | const auto sparse_inst{PrepareSparse(inst)}; |
| 397 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 404 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| 398 | const std::string_view type{TextureType(info)}; | 405 | const std::string_view type{TextureType(info)}; |
| 399 | const std::string texture{Texture(ctx, info, index)}; | 406 | const std::string texture{Texture(ctx, info, index)}; |
| @@ -436,7 +443,7 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 436 | const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; | 443 | const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; |
| 437 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 444 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 438 | const char comp{"xyzw"[info.gather_component]}; | 445 | const char comp{"xyzw"[info.gather_component]}; |
| 439 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | 446 | const auto sparse_inst{PrepareSparse(inst)}; |
| 440 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 447 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| 441 | const std::string_view type{TextureType(info)}; | 448 | const std::string_view type{TextureType(info)}; |
| 442 | const std::string texture{Texture(ctx, info, index)}; | 449 | const std::string texture{Texture(ctx, info, index)}; |
| @@ -462,7 +469,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde | |||
| 462 | // Allocate offsets early so they don't overwrite any consumed register | 469 | // Allocate offsets early so they don't overwrite any consumed register |
| 463 | const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; | 470 | const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; |
| 464 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 471 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 465 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | 472 | const auto sparse_inst{PrepareSparse(inst)}; |
| 466 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 473 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| 467 | const std::string_view type{TextureType(info)}; | 474 | const std::string_view type{TextureType(info)}; |
| 468 | const std::string texture{Texture(ctx, info, index)}; | 475 | const std::string texture{Texture(ctx, info, index)}; |
| @@ -500,7 +507,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde | |||
| 500 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | 507 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, |
| 501 | const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { | 508 | const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { |
| 502 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 509 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 503 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | 510 | const auto sparse_inst{PrepareSparse(inst)}; |
| 504 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 511 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| 505 | const std::string_view type{TextureType(info)}; | 512 | const std::string_view type{TextureType(info)}; |
| 506 | const std::string texture{Texture(ctx, info, index)}; | 513 | const std::string texture{Texture(ctx, info, index)}; |
| @@ -547,7 +554,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 547 | dpdx = ScopedRegister{ctx.reg_alloc}; | 554 | dpdx = ScopedRegister{ctx.reg_alloc}; |
| 548 | dpdy = ScopedRegister{ctx.reg_alloc}; | 555 | dpdy = ScopedRegister{ctx.reg_alloc}; |
| 549 | } | 556 | } |
| 550 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | 557 | const auto sparse_inst{PrepareSparse(inst)}; |
| 551 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 558 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| 552 | const std::string_view type{TextureType(info)}; | 559 | const std::string_view type{TextureType(info)}; |
| 553 | const std::string texture{Texture(ctx, info, index)}; | 560 | const std::string texture{Texture(ctx, info, index)}; |
| @@ -581,7 +588,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 581 | 588 | ||
| 582 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { | 589 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { |
| 583 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 590 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 584 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | 591 | const auto sparse_inst{PrepareSparse(inst)}; |
| 585 | const std::string_view format{FormatStorage(info.image_format)}; | 592 | const std::string_view format{FormatStorage(info.image_format)}; |
| 586 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 593 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| 587 | const std::string_view type{TextureType(info)}; | 594 | const std::string_view type{TextureType(info)}; |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index e5aac14c8..e9d1bae05 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp | |||
| @@ -9,6 +9,17 @@ | |||
| 9 | namespace Shader::Backend::GLASM { | 9 | namespace Shader::Backend::GLASM { |
| 10 | 10 | ||
| 11 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | 11 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { |
| 12 | const std::array flags{ | ||
| 13 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), | ||
| 14 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), | ||
| 15 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), | ||
| 16 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), | ||
| 17 | }; | ||
| 18 | for (IR::Inst* const flag_inst : flags) { | ||
| 19 | if (flag_inst) { | ||
| 20 | flag_inst->Invalidate(); | ||
| 21 | } | ||
| 22 | } | ||
| 12 | const bool cc{inst.HasAssociatedPseudoOperation()}; | 23 | const bool cc{inst.HasAssociatedPseudoOperation()}; |
| 13 | const std::string_view cc_mod{cc ? ".CC" : ""}; | 24 | const std::string_view cc_mod{cc ? ".CC" : ""}; |
| 14 | if (cc) { | 25 | if (cc) { |
| @@ -19,20 +30,22 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | |||
| 19 | if (!cc) { | 30 | if (!cc) { |
| 20 | return; | 31 | return; |
| 21 | } | 32 | } |
| 22 | static constexpr std::array<std::string_view, 4> masks{"EQ", "SF", "CF", "OF"}; | 33 | static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"}; |
| 23 | const std::array flags{ | 34 | for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) { |
| 24 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), | 35 | if (!flags[flag_index]) { |
| 25 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), | 36 | continue; |
| 26 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), | 37 | } |
| 27 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), | 38 | const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])}; |
| 28 | }; | 39 | if (flag_index == 0) { |
| 29 | for (size_t i = 0; i < flags.size(); ++i) { | 40 | ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret); |
| 30 | if (flags[i]) { | 41 | } else { |
| 31 | const auto flag_ret{ctx.reg_alloc.Define(*flags[i])}; | 42 | // We could use conditional execution here, but it's broken on Nvidia's compiler |
| 32 | ctx.Add("MOV.S {},0;" | 43 | ctx.Add("IF {}.x;" |
| 33 | "MOV.S {}({}.x),-1;", | 44 | "MOV.S {}.x,-1;" |
| 34 | flag_ret, flag_ret, masks[i]); | 45 | "ELSE;" |
| 35 | flags[i]->Invalidate(); | 46 | "MOV.S {}.x,0;" |
| 47 | "ENDIF;", | ||
| 48 | masks[flag_index], flag_ret, flag_ret); | ||
| 36 | } | 49 | } |
| 37 | } | 50 | } |
| 38 | } | 51 | } |
| @@ -136,6 +149,17 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal | |||
| 136 | 149 | ||
| 137 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, | 150 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, |
| 138 | ScalarU32 count) { | 151 | ScalarU32 count) { |
| 152 | const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); | ||
| 153 | const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); | ||
| 154 | if (zero) { | ||
| 155 | zero->Invalidate(); | ||
| 156 | } | ||
| 157 | if (sign) { | ||
| 158 | sign->Invalidate(); | ||
| 159 | } | ||
| 160 | if (zero || sign) { | ||
| 161 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 162 | } | ||
| 139 | const Register ret{ctx.reg_alloc.Define(inst)}; | 163 | const Register ret{ctx.reg_alloc.Define(inst)}; |
| 140 | if (count.type != Type::Register && offset.type != Type::Register) { | 164 | if (count.type != Type::Register && offset.type != Type::Register) { |
| 141 | ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); | 165 | ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); |
| @@ -145,13 +169,11 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal | |||
| 145 | "BFE.U {},RC,{};", | 169 | "BFE.U {},RC,{};", |
| 146 | count, offset, ret, base); | 170 | count, offset, ret, base); |
| 147 | } | 171 | } |
| 148 | if (const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)) { | 172 | if (zero) { |
| 149 | ctx.Add("SEQ.S {},{},0;", *zero, ret); | 173 | ctx.Add("SEQ.S {},{},0;", *zero, ret); |
| 150 | zero->Invalidate(); | ||
| 151 | } | 174 | } |
| 152 | if (const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)) { | 175 | if (sign) { |
| 153 | ctx.Add("SLT.S {},{},0;", *sign, ret); | 176 | ctx.Add("SLT.S {},{},0;", *sign, ret); |
| 154 | sign->Invalidate(); | ||
| 155 | } | 177 | } |
| 156 | } | 178 | } |
| 157 | 179 | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index af0e13d43..6e30790bb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp | |||
| @@ -51,6 +51,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { | |||
| 51 | static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | 51 | static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, |
| 52 | const IR::Value& clamp, const IR::Value& segmentation_mask, | 52 | const IR::Value& clamp, const IR::Value& segmentation_mask, |
| 53 | std::string_view op) { | 53 | std::string_view op) { |
| 54 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 55 | if (in_bounds) { | ||
| 56 | in_bounds->Invalidate(); | ||
| 57 | } | ||
| 54 | std::string mask; | 58 | std::string mask; |
| 55 | if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { | 59 | if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { |
| 56 | mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); | 60 | mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); |
| @@ -61,13 +65,11 @@ static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 | |||
| 61 | ScalarU32{ctx.reg_alloc.Consume(clamp)}); | 65 | ScalarU32{ctx.reg_alloc.Consume(clamp)}); |
| 62 | } | 66 | } |
| 63 | const Register value_ret{ctx.reg_alloc.Define(inst)}; | 67 | const Register value_ret{ctx.reg_alloc.Define(inst)}; |
| 64 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 65 | if (in_bounds) { | 68 | if (in_bounds) { |
| 66 | const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; | 69 | const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; |
| 67 | ctx.Add("SHF{}.U {},{},{},{};" | 70 | ctx.Add("SHF{}.U {},{},{},{};" |
| 68 | "MOV.U {}.x,{}.y;", | 71 | "MOV.U {}.x,{}.y;", |
| 69 | op, bounds_ret, value, index, mask, value_ret, bounds_ret); | 72 | op, bounds_ret, value, index, mask, value_ret, bounds_ret); |
| 70 | in_bounds->Invalidate(); | ||
| 71 | } else { | 73 | } else { |
| 72 | ctx.Add("SHF{}.U {},{},{},{};" | 74 | ctx.Add("SHF{}.U {},{},{},{};" |
| 73 | "MOV.U {}.x,{}.y;", | 75 | "MOV.U {}.x,{}.y;", |
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 707b22247..c55a833c6 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp | |||
| @@ -22,11 +22,19 @@ Register RegAlloc::LongDefine(IR::Inst& inst) { | |||
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | Value RegAlloc::Peek(const IR::Value& value) { | 24 | Value RegAlloc::Peek(const IR::Value& value) { |
| 25 | return value.IsImmediate() ? MakeImm(value) : PeekInst(*value.InstRecursive()); | 25 | if (value.IsImmediate()) { |
| 26 | return MakeImm(value); | ||
| 27 | } else { | ||
| 28 | return PeekInst(*value.Inst()); | ||
| 29 | } | ||
| 26 | } | 30 | } |
| 27 | 31 | ||
| 28 | Value RegAlloc::Consume(const IR::Value& value) { | 32 | Value RegAlloc::Consume(const IR::Value& value) { |
| 29 | return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); | 33 | if (value.IsImmediate()) { |
| 34 | return MakeImm(value); | ||
| 35 | } else { | ||
| 36 | return ConsumeInst(*value.Inst()); | ||
| 37 | } | ||
| 30 | } | 38 | } |
| 31 | 39 | ||
| 32 | void RegAlloc::Unref(IR::Inst& inst) { | 40 | void RegAlloc::Unref(IR::Inst& inst) { |
| @@ -88,7 +96,14 @@ Value RegAlloc::MakeImm(const IR::Value& value) { | |||
| 88 | } | 96 | } |
| 89 | 97 | ||
| 90 | Register RegAlloc::Define(IR::Inst& inst, bool is_long) { | 98 | Register RegAlloc::Define(IR::Inst& inst, bool is_long) { |
| 91 | inst.SetDefinition<Id>(Alloc(is_long)); | 99 | if (inst.HasUses()) { |
| 100 | inst.SetDefinition<Id>(Alloc(is_long)); | ||
| 101 | } else { | ||
| 102 | Id id{}; | ||
| 103 | id.is_long.Assign(is_long ? 1 : 0); | ||
| 104 | id.is_null.Assign(1); | ||
| 105 | inst.SetDefinition<Id>(id); | ||
| 106 | } | ||
| 92 | return Register{PeekInst(inst)}; | 107 | return Register{PeekInst(inst)}; |
| 93 | } | 108 | } |
| 94 | 109 | ||
| @@ -115,10 +130,12 @@ Id RegAlloc::Alloc(bool is_long) { | |||
| 115 | num_regs = std::max(num_regs, reg + 1); | 130 | num_regs = std::max(num_regs, reg + 1); |
| 116 | use[reg] = true; | 131 | use[reg] = true; |
| 117 | Id ret{}; | 132 | Id ret{}; |
| 118 | ret.index.Assign(static_cast<u32>(reg)); | 133 | ret.is_valid.Assign(1); |
| 119 | ret.is_long.Assign(is_long ? 1 : 0); | 134 | ret.is_long.Assign(is_long ? 1 : 0); |
| 120 | ret.is_spill.Assign(0); | 135 | ret.is_spill.Assign(0); |
| 121 | ret.is_condition_code.Assign(0); | 136 | ret.is_condition_code.Assign(0); |
| 137 | ret.is_null.Assign(0); | ||
| 138 | ret.index.Assign(static_cast<u32>(reg)); | ||
| 122 | return ret; | 139 | return ret; |
| 123 | } | 140 | } |
| 124 | } | 141 | } |
| @@ -126,6 +143,9 @@ Id RegAlloc::Alloc(bool is_long) { | |||
| 126 | } | 143 | } |
| 127 | 144 | ||
| 128 | void RegAlloc::Free(Id id) { | 145 | void RegAlloc::Free(Id id) { |
| 146 | if (id.is_valid == 0) { | ||
| 147 | throw LogicError("Freeing invalid register"); | ||
| 148 | } | ||
| 129 | if (id.is_spill != 0) { | 149 | if (id.is_spill != 0) { |
| 130 | throw NotImplementedException("Free spill"); | 150 | throw NotImplementedException("Free spill"); |
| 131 | } | 151 | } |
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 41b7c92be..b97c84146 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h | |||
| @@ -35,10 +35,12 @@ enum class Type : u32 { | |||
| 35 | struct Id { | 35 | struct Id { |
| 36 | union { | 36 | union { |
| 37 | u32 raw; | 37 | u32 raw; |
| 38 | BitField<0, 29, u32> index; | 38 | BitField<0, 1, u32> is_valid; |
| 39 | BitField<29, 1, u32> is_long; | 39 | BitField<1, 1, u32> is_long; |
| 40 | BitField<30, 1, u32> is_spill; | 40 | BitField<2, 1, u32> is_spill; |
| 41 | BitField<31, 1, u32> is_condition_code; | 41 | BitField<3, 1, u32> is_condition_code; |
| 42 | BitField<4, 1, u32> is_null; | ||
| 43 | BitField<5, 27, u32> index; | ||
| 42 | }; | 44 | }; |
| 43 | 45 | ||
| 44 | bool operator==(Id rhs) const noexcept { | 46 | bool operator==(Id rhs) const noexcept { |
| @@ -164,12 +166,18 @@ auto FormatTo(FormatContext& ctx, Id id) { | |||
| 164 | throw NotImplementedException("Spill emission"); | 166 | throw NotImplementedException("Spill emission"); |
| 165 | } | 167 | } |
| 166 | if constexpr (scalar) { | 168 | if constexpr (scalar) { |
| 169 | if (id.is_null != 0) { | ||
| 170 | return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x"); | ||
| 171 | } | ||
| 167 | if (id.is_long != 0) { | 172 | if (id.is_long != 0) { |
| 168 | return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); | 173 | return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); |
| 169 | } else { | 174 | } else { |
| 170 | return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); | 175 | return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); |
| 171 | } | 176 | } |
| 172 | } else { | 177 | } else { |
| 178 | if (id.is_null != 0) { | ||
| 179 | return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC"); | ||
| 180 | } | ||
| 173 | if (id.is_long != 0) { | 181 | if (id.is_long != 0) { |
| 174 | return fmt::format_to(ctx.out(), "D{}", id.index.Value()); | 182 | return fmt::format_to(ctx.out(), "D{}", id.index.Value()); |
| 175 | } else { | 183 | } else { |