diff options
| author | 2021-05-09 18:03:01 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:30 -0400 | |
| commit | 4502595bc2518eecf934110e9393b11bf0c2f75a (patch) | |
| tree | 3e75e200936bce393152792b9ba90413ea83482c /src/shader_recompiler/backend | |
| parent | glasm: Implement GLASM fp16 packing and move bitwise insns (diff) | |
| download | yuzu-4502595bc2518eecf934110e9393b11bf0c2f75a.tar.gz yuzu-4502595bc2518eecf934110e9393b11bf0c2f75a.tar.xz yuzu-4502595bc2518eecf934110e9393b11bf0c2f75a.zip | |
glasm: Initial GLASM fp64 support
Diffstat (limited to 'src/shader_recompiler/backend')
9 files changed, 152 insertions, 55 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index a59acbf6c..37663c1c8 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h | |||
| @@ -30,6 +30,13 @@ public: | |||
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | template <typename... Args> | 32 | template <typename... Args> |
| 33 | void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 34 | code += fmt::format(format_str, reg_alloc.LongDefine(inst), std::forward<Args>(args)...); | ||
| 35 | // TODO: Remove this | ||
| 36 | code += '\n'; | ||
| 37 | } | ||
| 38 | |||
| 39 | template <typename... Args> | ||
| 33 | void Add(const char* format_str, Args&&... args) { | 40 | void Add(const char* format_str, Args&&... args) { |
| 34 | code += fmt::format(format_str, std::forward<Args>(args)...); | 41 | code += fmt::format(format_str, std::forward<Args>(args)...); |
| 35 | // TODO: Remove this | 42 | // TODO: Remove this |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 842ec157d..9db6eb4a0 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp | |||
| @@ -42,7 +42,11 @@ template <bool scalar> | |||
| 42 | struct RegWrapper { | 42 | struct RegWrapper { |
| 43 | RegWrapper(EmitContext& ctx, Value value) | 43 | RegWrapper(EmitContext& ctx, Value value) |
| 44 | : reg_alloc{ctx.reg_alloc}, allocated{value.type != Type::Register} { | 44 | : reg_alloc{ctx.reg_alloc}, allocated{value.type != Type::Register} { |
| 45 | reg = allocated ? reg_alloc.AllocReg() : Register{value}; | 45 | if (allocated) { |
| 46 | reg = value.type == Type::F64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); | ||
| 47 | } else { | ||
| 48 | reg = Register{value}; | ||
| 49 | } | ||
| 46 | switch (value.type) { | 50 | switch (value.type) { |
| 47 | case Type::Register: | 51 | case Type::Register: |
| 48 | break; | 52 | break; |
| @@ -55,6 +59,9 @@ struct RegWrapper { | |||
| 55 | case Type::F32: | 59 | case Type::F32: |
| 56 | ctx.Add("MOV.F {}.x,{};", reg, value.imm_f32); | 60 | ctx.Add("MOV.F {}.x,{};", reg, value.imm_f32); |
| 57 | break; | 61 | break; |
| 62 | case Type::F64: | ||
| 63 | ctx.Add("MOV.F64 {}.x,{};", reg, value.imm_f64); | ||
| 64 | break; | ||
| 58 | } | 65 | } |
| 59 | } | 66 | } |
| 60 | ~RegWrapper() { | 67 | ~RegWrapper() { |
| @@ -162,10 +169,12 @@ std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { | |||
| 162 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { | 169 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { |
| 163 | header += fmt::format("R{},", index); | 170 | header += fmt::format("R{},", index); |
| 164 | } | 171 | } |
| 165 | header += "RC;"; | 172 | header += "RC;" |
| 166 | if (!program.info.storage_buffers_descriptors.empty()) { | 173 | "LONG TEMP "; |
| 167 | header += "LONG TEMP LC;"; | 174 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { |
| 175 | header += fmt::format("D{},", index); | ||
| 168 | } | 176 | } |
| 177 | header += "DC;"; | ||
| 169 | ctx.code.insert(0, header); | 178 | ctx.code.insert(0, header); |
| 170 | ctx.code += "END"; | 179 | ctx.code += "END"; |
| 171 | return ctx.code; | 180 | return ctx.code; |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 918d82375..eb6140954 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp | |||
| @@ -72,4 +72,12 @@ void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { | |||
| 72 | ctx.Add("UP2H {}.xy,{}.x;", inst, value); | 72 | ctx.Add("UP2H {}.xy,{}.x;", inst, value); |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 76 | ctx.LongAdd("PK64 {}.x,{};", inst, value); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 80 | ctx.Add("UP64 {}.xy,{}.x;", inst, value); | ||
| 81 | } | ||
| 82 | |||
| 75 | } // namespace Shader::Backend::GLASM | 83 | } // namespace Shader::Backend::GLASM |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp index fed6503c6..2b9a210aa 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp | |||
| @@ -10,7 +10,8 @@ | |||
| 10 | 10 | ||
| 11 | namespace Shader::Backend::GLASM { | 11 | namespace Shader::Backend::GLASM { |
| 12 | 12 | ||
| 13 | void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | 13 | void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |
| 14 | [[maybe_unused]] Register value) { | ||
| 14 | throw NotImplementedException("GLASM instruction"); | 15 | throw NotImplementedException("GLASM instruction"); |
| 15 | } | 16 | } |
| 16 | 17 | ||
| @@ -18,8 +19,8 @@ void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | |||
| 18 | ctx.Add("MOV.F {}.x,|{}|;", inst, value); | 19 | ctx.Add("MOV.F {}.x,|{}|;", inst, value); |
| 19 | } | 20 | } |
| 20 | 21 | ||
| 21 | void EmitFPAbs64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | 22 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { |
| 22 | throw NotImplementedException("GLASM instruction"); | 23 | ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value); |
| 23 | } | 24 | } |
| 24 | 25 | ||
| 25 | void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 26 | void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |
| @@ -31,9 +32,8 @@ void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | |||
| 31 | ctx.Add("ADD.F {}.x,{},{};", inst, a, b); | 32 | ctx.Add("ADD.F {}.x,{},{};", inst, a, b); |
| 32 | } | 33 | } |
| 33 | 34 | ||
| 34 | void EmitFPAdd64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 35 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { |
| 35 | [[maybe_unused]] Register a, [[maybe_unused]] Register b) { | 36 | ctx.LongAdd("ADD.F64 {}.x,{},{};", inst, a, b); |
| 36 | throw NotImplementedException("GLASM instruction"); | ||
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 39 | void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |
| @@ -94,8 +94,8 @@ void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) { | |||
| 94 | ctx.Add("MOV.F {}.x,-{};", inst, value); | 94 | ctx.Add("MOV.F {}.x,-{};", inst, value); |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | void EmitFPNeg64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | 97 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) { |
| 98 | throw NotImplementedException("GLASM instruction"); | 98 | ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value); |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | void EmitFPSin([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { | 101 | void EmitFPSin([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarF32 value) { |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index cb1067dc9..ab1e08215 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | |||
| @@ -202,20 +202,20 @@ void EmitPackFloat2x16(EmitContext& ctx, Register value); | |||
| 202 | void EmitUnpackFloat2x16(EmitContext& ctx, Register value); | 202 | void EmitUnpackFloat2x16(EmitContext& ctx, Register value); |
| 203 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); | 203 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); |
| 204 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); | 204 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); |
| 205 | void EmitPackDouble2x32(EmitContext& ctx, Register value); | 205 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); |
| 206 | void EmitUnpackDouble2x32(EmitContext& ctx, Register value); | 206 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); |
| 207 | void EmitGetZeroFromOp(EmitContext& ctx); | 207 | void EmitGetZeroFromOp(EmitContext& ctx); |
| 208 | void EmitGetSignFromOp(EmitContext& ctx); | 208 | void EmitGetSignFromOp(EmitContext& ctx); |
| 209 | void EmitGetCarryFromOp(EmitContext& ctx); | 209 | void EmitGetCarryFromOp(EmitContext& ctx); |
| 210 | void EmitGetOverflowFromOp(EmitContext& ctx); | 210 | void EmitGetOverflowFromOp(EmitContext& ctx); |
| 211 | void EmitGetSparseFromOp(EmitContext& ctx); | 211 | void EmitGetSparseFromOp(EmitContext& ctx); |
| 212 | void EmitGetInBoundsFromOp(EmitContext& ctx); | 212 | void EmitGetInBoundsFromOp(EmitContext& ctx); |
| 213 | void EmitFPAbs16(EmitContext& ctx, Register value); | 213 | void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value); |
| 214 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | 214 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); |
| 215 | void EmitFPAbs64(EmitContext& ctx, Register value); | 215 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); |
| 216 | void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | 216 | void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); |
| 217 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | 217 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); |
| 218 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | 218 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); |
| 219 | void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); | 219 | void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); |
| 220 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); | 220 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); |
| 221 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); | 221 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); |
| @@ -228,7 +228,7 @@ void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | |||
| 228 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | 228 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); |
| 229 | void EmitFPNeg16(EmitContext& ctx, Register value); | 229 | void EmitFPNeg16(EmitContext& ctx, Register value); |
| 230 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); | 230 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); |
| 231 | void EmitFPNeg64(EmitContext& ctx, Register value); | 231 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value); |
| 232 | void EmitFPSin(EmitContext& ctx, ScalarF32 value); | 232 | void EmitFPSin(EmitContext& ctx, ScalarF32 value); |
| 233 | void EmitFPCos(EmitContext& ctx, ScalarF32 value); | 233 | void EmitFPCos(EmitContext& ctx, ScalarF32 value); |
| 234 | void EmitFPExp2(EmitContext& ctx, ScalarF32 value); | 234 | void EmitFPExp2(EmitContext& ctx, ScalarF32 value); |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 8ef0f7c17..0c6a6e1c8 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp | |||
| @@ -17,9 +17,9 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | |||
| 17 | // address = c[binding].xy | 17 | // address = c[binding].xy |
| 18 | // length = c[binding].z | 18 | // length = c[binding].z |
| 19 | const u32 sb_binding{binding.U32()}; | 19 | const u32 sb_binding{binding.U32()}; |
| 20 | ctx.Add("PK64.U LC,c[{}];" // pointer = address | 20 | ctx.Add("PK64.U DC,c[{}];" // pointer = address |
| 21 | "CVT.U64.U32 LC.z,{};" // offset = uint64_t(offset) | 21 | "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) |
| 22 | "ADD.U64 LC.x,LC.x,LC.z;" // pointer += offset | 22 | "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset |
| 23 | "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length | 23 | "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length |
| 24 | sb_binding, offset, offset, sb_binding); | 24 | sb_binding, offset, offset, sb_binding); |
| 25 | if (else_expr.empty()) { | 25 | if (else_expr.empty()) { |
| @@ -32,13 +32,13 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | |||
| 32 | template <typename ValueType> | 32 | template <typename ValueType> |
| 33 | void Store(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, | 33 | void Store(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, |
| 34 | std::string_view size) { | 34 | std::string_view size) { |
| 35 | StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},LC.x;", size, value)); | 35 | StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | 38 | void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, |
| 39 | std::string_view size) { | 39 | std::string_view size) { |
| 40 | const Register ret{ctx.reg_alloc.Define(inst)}; | 40 | const Register ret{ctx.reg_alloc.Define(inst)}; |
| 41 | StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},LC.x;", size, ret), | 41 | StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, ret), |
| 42 | fmt::format("MOV.U {},{{0,0,0,0}};", ret)); | 42 | fmt::format("MOV.U {},{{0,0,0,0}};", ret)); |
| 43 | } | 43 | } |
| 44 | } // Anonymous namespace | 44 | } // Anonymous namespace |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 03464524e..f3baf33af 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp | |||
| @@ -281,14 +281,6 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist | |||
| 281 | NotImplemented(); | 281 | NotImplemented(); |
| 282 | } | 282 | } |
| 283 | 283 | ||
| 284 | void EmitPackDouble2x32(EmitContext& ctx, Register value) { | ||
| 285 | NotImplemented(); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitUnpackDouble2x32(EmitContext& ctx, Register value) { | ||
| 289 | NotImplemented(); | ||
| 290 | } | ||
| 291 | |||
| 292 | void EmitGetZeroFromOp(EmitContext& ctx) { | 284 | void EmitGetZeroFromOp(EmitContext& ctx) { |
| 293 | NotImplemented(); | 285 | NotImplemented(); |
| 294 | } | 286 | } |
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 030b48d83..82b627500 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp | |||
| @@ -14,12 +14,11 @@ | |||
| 14 | namespace Shader::Backend::GLASM { | 14 | namespace Shader::Backend::GLASM { |
| 15 | 15 | ||
| 16 | Register RegAlloc::Define(IR::Inst& inst) { | 16 | Register RegAlloc::Define(IR::Inst& inst) { |
| 17 | const Id id{Alloc()}; | 17 | return Define(inst, false); |
| 18 | inst.SetDefinition<Id>(id); | 18 | } |
| 19 | Register ret; | 19 | |
| 20 | ret.type = Type::Register; | 20 | Register RegAlloc::LongDefine(IR::Inst& inst) { |
| 21 | ret.id = id; | 21 | return Define(inst, true); |
| 22 | return ret; | ||
| 23 | } | 22 | } |
| 24 | 23 | ||
| 25 | Value RegAlloc::Consume(const IR::Value& value) { | 24 | Value RegAlloc::Consume(const IR::Value& value) { |
| @@ -40,6 +39,10 @@ Value RegAlloc::Consume(const IR::Value& value) { | |||
| 40 | ret.type = Type::F32; | 39 | ret.type = Type::F32; |
| 41 | ret.imm_f32 = value.F32(); | 40 | ret.imm_f32 = value.F32(); |
| 42 | break; | 41 | break; |
| 42 | case IR::Type::F64: | ||
| 43 | ret.type = Type::F64; | ||
| 44 | ret.imm_f64 = value.F64(); | ||
| 45 | break; | ||
| 43 | default: | 46 | default: |
| 44 | throw NotImplementedException("Immediate type {}", value.Type()); | 47 | throw NotImplementedException("Immediate type {}", value.Type()); |
| 45 | } | 48 | } |
| @@ -49,7 +52,14 @@ Value RegAlloc::Consume(const IR::Value& value) { | |||
| 49 | Register RegAlloc::AllocReg() { | 52 | Register RegAlloc::AllocReg() { |
| 50 | Register ret; | 53 | Register ret; |
| 51 | ret.type = Type::Register; | 54 | ret.type = Type::Register; |
| 52 | ret.id = Alloc(); | 55 | ret.id = Alloc(false); |
| 56 | return ret; | ||
| 57 | } | ||
| 58 | |||
| 59 | Register RegAlloc::AllocLongReg() { | ||
| 60 | Register ret; | ||
| 61 | ret.type = Type::Register; | ||
| 62 | ret.id = Alloc(true); | ||
| 53 | return ret; | 63 | return ret; |
| 54 | } | 64 | } |
| 55 | 65 | ||
| @@ -57,6 +67,15 @@ void RegAlloc::FreeReg(Register reg) { | |||
| 57 | Free(reg.id); | 67 | Free(reg.id); |
| 58 | } | 68 | } |
| 59 | 69 | ||
| 70 | Register RegAlloc::Define(IR::Inst& inst, bool is_long) { | ||
| 71 | const Id id{Alloc(is_long)}; | ||
| 72 | inst.SetDefinition<Id>(id); | ||
| 73 | Register ret; | ||
| 74 | ret.type = Type::Register; | ||
| 75 | ret.id = id; | ||
| 76 | return ret; | ||
| 77 | } | ||
| 78 | |||
| 60 | Value RegAlloc::Consume(IR::Inst& inst) { | 79 | Value RegAlloc::Consume(IR::Inst& inst) { |
| 61 | const Id id{inst.Definition<Id>()}; | 80 | const Id id{inst.Definition<Id>()}; |
| 62 | inst.DestructiveRemoveUsage(); | 81 | inst.DestructiveRemoveUsage(); |
| @@ -69,18 +88,23 @@ Value RegAlloc::Consume(IR::Inst& inst) { | |||
| 69 | return ret; | 88 | return ret; |
| 70 | } | 89 | } |
| 71 | 90 | ||
| 72 | Id RegAlloc::Alloc() { | 91 | Id RegAlloc::Alloc(bool is_long) { |
| 73 | for (size_t reg = 0; reg < NUM_REGS; ++reg) { | 92 | size_t& num_regs{is_long ? num_used_long_registers : num_used_registers}; |
| 74 | if (register_use[reg]) { | 93 | std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use}; |
| 75 | continue; | 94 | if (num_used_registers + num_used_long_registers < NUM_REGS) { |
| 95 | for (size_t reg = 0; reg < NUM_REGS; ++reg) { | ||
| 96 | if (use[reg]) { | ||
| 97 | continue; | ||
| 98 | } | ||
| 99 | num_regs = std::max(num_regs, reg + 1); | ||
| 100 | use[reg] = true; | ||
| 101 | Id ret{}; | ||
| 102 | ret.index.Assign(static_cast<u32>(reg)); | ||
| 103 | ret.is_long.Assign(is_long ? 1 : 0); | ||
| 104 | ret.is_spill.Assign(0); | ||
| 105 | ret.is_condition_code.Assign(0); | ||
| 106 | return ret; | ||
| 76 | } | 107 | } |
| 77 | num_used_registers = std::max(num_used_registers, reg + 1); | ||
| 78 | register_use[reg] = true; | ||
| 79 | Id ret{}; | ||
| 80 | ret.index.Assign(static_cast<u32>(reg)); | ||
| 81 | ret.is_spill.Assign(0); | ||
| 82 | ret.is_condition_code.Assign(0); | ||
| 83 | return ret; | ||
| 84 | } | 108 | } |
| 85 | throw NotImplementedException("Register spilling"); | 109 | throw NotImplementedException("Register spilling"); |
| 86 | } | 110 | } |
| @@ -89,7 +113,11 @@ void RegAlloc::Free(Id id) { | |||
| 89 | if (id.is_spill != 0) { | 113 | if (id.is_spill != 0) { |
| 90 | throw NotImplementedException("Free spill"); | 114 | throw NotImplementedException("Free spill"); |
| 91 | } | 115 | } |
| 92 | register_use[id.index] = false; | 116 | if (id.is_long != 0) { |
| 117 | long_register_use[id.index] = false; | ||
| 118 | } else { | ||
| 119 | register_use[id.index] = false; | ||
| 120 | } | ||
| 93 | } | 121 | } |
| 94 | 122 | ||
| 95 | } // namespace Shader::Backend::GLASM | 123 | } // namespace Shader::Backend::GLASM |
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 6a238afa9..f1899eae1 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h | |||
| @@ -27,12 +27,14 @@ enum class Type : u32 { | |||
| 27 | U32, | 27 | U32, |
| 28 | S32, | 28 | S32, |
| 29 | F32, | 29 | F32, |
| 30 | F64, | ||
| 30 | }; | 31 | }; |
| 31 | 32 | ||
| 32 | struct Id { | 33 | struct Id { |
| 33 | union { | 34 | union { |
| 34 | u32 raw; | 35 | u32 raw; |
| 35 | BitField<0, 30, u32> index; | 36 | BitField<0, 29, u32> index; |
| 37 | BitField<29, 1, u32> is_long; | ||
| 36 | BitField<30, 1, u32> is_spill; | 38 | BitField<30, 1, u32> is_spill; |
| 37 | BitField<31, 1, u32> is_condition_code; | 39 | BitField<31, 1, u32> is_condition_code; |
| 38 | }; | 40 | }; |
| @@ -53,6 +55,7 @@ struct Value { | |||
| 53 | u32 imm_u32; | 55 | u32 imm_u32; |
| 54 | s32 imm_s32; | 56 | s32 imm_s32; |
| 55 | f32 imm_f32; | 57 | f32 imm_f32; |
| 58 | f64 imm_f64; | ||
| 56 | }; | 59 | }; |
| 57 | 60 | ||
| 58 | bool operator==(const Value& rhs) const noexcept { | 61 | bool operator==(const Value& rhs) const noexcept { |
| @@ -68,6 +71,8 @@ struct Value { | |||
| 68 | return imm_s32 == rhs.imm_s32; | 71 | return imm_s32 == rhs.imm_s32; |
| 69 | case Type::F32: | 72 | case Type::F32: |
| 70 | return Common::BitCast<u32>(imm_f32) == Common::BitCast<u32>(rhs.imm_f32); | 73 | return Common::BitCast<u32>(imm_f32) == Common::BitCast<u32>(rhs.imm_f32); |
| 74 | case Type::F64: | ||
| 75 | return Common::BitCast<u64>(imm_f64) == Common::BitCast<u64>(rhs.imm_f64); | ||
| 71 | } | 76 | } |
| 72 | return false; | 77 | return false; |
| 73 | } | 78 | } |
| @@ -80,6 +85,7 @@ struct ScalarRegister : Value {}; | |||
| 80 | struct ScalarU32 : Value {}; | 85 | struct ScalarU32 : Value {}; |
| 81 | struct ScalarS32 : Value {}; | 86 | struct ScalarS32 : Value {}; |
| 82 | struct ScalarF32 : Value {}; | 87 | struct ScalarF32 : Value {}; |
| 88 | struct ScalarF64 : Value {}; | ||
| 83 | 89 | ||
| 84 | class RegAlloc { | 90 | class RegAlloc { |
| 85 | public: | 91 | public: |
| @@ -87,9 +93,13 @@ public: | |||
| 87 | 93 | ||
| 88 | Register Define(IR::Inst& inst); | 94 | Register Define(IR::Inst& inst); |
| 89 | 95 | ||
| 96 | Register LongDefine(IR::Inst& inst); | ||
| 97 | |||
| 90 | Value Consume(const IR::Value& value); | 98 | Value Consume(const IR::Value& value); |
| 91 | 99 | ||
| 92 | Register AllocReg(); | 100 | [[nodiscard]] Register AllocReg(); |
| 101 | |||
| 102 | [[nodiscard]] Register AllocLongReg(); | ||
| 93 | 103 | ||
| 94 | void FreeReg(Register reg); | 104 | void FreeReg(Register reg); |
| 95 | 105 | ||
| @@ -97,19 +107,27 @@ public: | |||
| 97 | return num_used_registers; | 107 | return num_used_registers; |
| 98 | } | 108 | } |
| 99 | 109 | ||
| 110 | [[nodiscard]] size_t NumUsedLongRegisters() const noexcept { | ||
| 111 | return num_used_long_registers; | ||
| 112 | } | ||
| 113 | |||
| 100 | private: | 114 | private: |
| 101 | static constexpr size_t NUM_REGS = 4096; | 115 | static constexpr size_t NUM_REGS = 4096; |
| 102 | static constexpr size_t NUM_ELEMENTS = 4; | 116 | static constexpr size_t NUM_ELEMENTS = 4; |
| 103 | 117 | ||
| 118 | Register Define(IR::Inst& inst, bool is_long); | ||
| 119 | |||
| 104 | Value Consume(IR::Inst& inst); | 120 | Value Consume(IR::Inst& inst); |
| 105 | 121 | ||
| 106 | Id Alloc(); | 122 | Id Alloc(bool is_long); |
| 107 | 123 | ||
| 108 | void Free(Id id); | 124 | void Free(Id id); |
| 109 | 125 | ||
| 110 | EmitContext& ctx; | 126 | EmitContext& ctx; |
| 111 | size_t num_used_registers{}; | 127 | size_t num_used_registers{}; |
| 128 | size_t num_used_long_registers{}; | ||
| 112 | std::bitset<NUM_REGS> register_use{}; | 129 | std::bitset<NUM_REGS> register_use{}; |
| 130 | std::bitset<NUM_REGS> long_register_use{}; | ||
| 113 | }; | 131 | }; |
| 114 | 132 | ||
| 115 | template <bool scalar, typename FormatContext> | 133 | template <bool scalar, typename FormatContext> |
| @@ -121,9 +139,17 @@ auto FormatTo(FormatContext& ctx, Id id) { | |||
| 121 | throw NotImplementedException("Spill emission"); | 139 | throw NotImplementedException("Spill emission"); |
| 122 | } | 140 | } |
| 123 | if constexpr (scalar) { | 141 | if constexpr (scalar) { |
| 124 | return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); | 142 | if (id.is_long != 0) { |
| 143 | return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); | ||
| 144 | } else { | ||
| 145 | return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); | ||
| 146 | } | ||
| 125 | } else { | 147 | } else { |
| 126 | return fmt::format_to(ctx.out(), "R{}", id.index.Value()); | 148 | if (id.is_long != 0) { |
| 149 | return fmt::format_to(ctx.out(), "D{}", id.index.Value()); | ||
| 150 | } else { | ||
| 151 | return fmt::format_to(ctx.out(), "R{}", id.index.Value()); | ||
| 152 | } | ||
| 127 | } | 153 | } |
| 128 | } | 154 | } |
| 129 | 155 | ||
| @@ -184,6 +210,8 @@ struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> { | |||
| 184 | return fmt::format_to(ctx.out(), "{}", static_cast<u32>(value.imm_s32)); | 210 | return fmt::format_to(ctx.out(), "{}", static_cast<u32>(value.imm_s32)); |
| 185 | case Shader::Backend::GLASM::Type::F32: | 211 | case Shader::Backend::GLASM::Type::F32: |
| 186 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<u32>(value.imm_f32)); | 212 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<u32>(value.imm_f32)); |
| 213 | case Shader::Backend::GLASM::Type::F64: | ||
| 214 | break; | ||
| 187 | } | 215 | } |
| 188 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | 216 | throw Shader::InvalidArgument("Invalid value type {}", value.type); |
| 189 | } | 217 | } |
| @@ -205,6 +233,8 @@ struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> { | |||
| 205 | return fmt::format_to(ctx.out(), "{}", value.imm_s32); | 233 | return fmt::format_to(ctx.out(), "{}", value.imm_s32); |
| 206 | case Shader::Backend::GLASM::Type::F32: | 234 | case Shader::Backend::GLASM::Type::F32: |
| 207 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<s32>(value.imm_f32)); | 235 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<s32>(value.imm_f32)); |
| 236 | case Shader::Backend::GLASM::Type::F64: | ||
| 237 | break; | ||
| 208 | } | 238 | } |
| 209 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | 239 | throw Shader::InvalidArgument("Invalid value type {}", value.type); |
| 210 | } | 240 | } |
| @@ -226,6 +256,29 @@ struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> { | |||
| 226 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<s32>(value.imm_s32)); | 256 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<s32>(value.imm_s32)); |
| 227 | case Shader::Backend::GLASM::Type::F32: | 257 | case Shader::Backend::GLASM::Type::F32: |
| 228 | return fmt::format_to(ctx.out(), "{}", value.imm_f32); | 258 | return fmt::format_to(ctx.out(), "{}", value.imm_f32); |
| 259 | case Shader::Backend::GLASM::Type::F64: | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 263 | } | ||
| 264 | }; | ||
| 265 | |||
| 266 | template <> | ||
| 267 | struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> { | ||
| 268 | constexpr auto parse(format_parse_context& ctx) { | ||
| 269 | return ctx.begin(); | ||
| 270 | } | ||
| 271 | template <typename FormatContext> | ||
| 272 | auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) { | ||
| 273 | switch (value.type) { | ||
| 274 | case Shader::Backend::GLASM::Type::Register: | ||
| 275 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 276 | case Shader::Backend::GLASM::Type::U32: | ||
| 277 | case Shader::Backend::GLASM::Type::S32: | ||
| 278 | case Shader::Backend::GLASM::Type::F32: | ||
| 279 | break; | ||
| 280 | case Shader::Backend::GLASM::Type::F64: | ||
| 281 | return format_to(ctx.out(), "{}", value.imm_f64); | ||
| 229 | } | 282 | } |
| 230 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | 283 | throw Shader::InvalidArgument("Invalid value type {}", value.type); |
| 231 | } | 284 | } |