diff options
Diffstat (limited to 'src/shader_recompiler/backend/glasm/emit_glasm.cpp')
| -rw-r--r-- | src/shader_recompiler/backend/glasm/emit_glasm.cpp | 492 |
1 files changed, 492 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp new file mode 100644 index 000000000..a5e8c9b6e --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp | |||
| @@ -0,0 +1,492 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | #include <tuple> | ||
| 8 | |||
| 9 | #include "common/div_ceil.h" | ||
| 10 | #include "common/settings.h" | ||
| 11 | #include "shader_recompiler/backend/bindings.h" | ||
| 12 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 13 | #include "shader_recompiler/backend/glasm/emit_glasm.h" | ||
| 14 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 16 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 17 | #include "shader_recompiler/profile.h" | ||
| 18 | #include "shader_recompiler/runtime_info.h" | ||
| 19 | |||
| 20 | namespace Shader::Backend::GLASM { | ||
| 21 | namespace { | ||
| 22 | template <class Func> | ||
| 23 | struct FuncTraits {}; | ||
| 24 | |||
| 25 | template <class ReturnType_, class... Args> | ||
| 26 | struct FuncTraits<ReturnType_ (*)(Args...)> { | ||
| 27 | using ReturnType = ReturnType_; | ||
| 28 | |||
| 29 | static constexpr size_t NUM_ARGS = sizeof...(Args); | ||
| 30 | |||
| 31 | template <size_t I> | ||
| 32 | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; | ||
| 33 | }; | ||
| 34 | |||
| 35 | template <typename T> | ||
| 36 | struct Identity { | ||
| 37 | Identity(T data_) : data{data_} {} | ||
| 38 | |||
| 39 | T Extract() { | ||
| 40 | return data; | ||
| 41 | } | ||
| 42 | |||
| 43 | T data; | ||
| 44 | }; | ||
| 45 | |||
| 46 | template <bool scalar> | ||
| 47 | class RegWrapper { | ||
| 48 | public: | ||
| 49 | RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} { | ||
| 50 | const Value value{reg_alloc.Peek(ir_value)}; | ||
| 51 | if (value.type == Type::Register) { | ||
| 52 | inst = ir_value.InstRecursive(); | ||
| 53 | reg = Register{value}; | ||
| 54 | } else { | ||
| 55 | reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); | ||
| 56 | } | ||
| 57 | switch (value.type) { | ||
| 58 | case Type::Register: | ||
| 59 | case Type::Void: | ||
| 60 | break; | ||
| 61 | case Type::U32: | ||
| 62 | ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32); | ||
| 63 | break; | ||
| 64 | case Type::U64: | ||
| 65 | ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64); | ||
| 66 | break; | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | auto Extract() { | ||
| 71 | if (inst) { | ||
| 72 | reg_alloc.Unref(*inst); | ||
| 73 | } else { | ||
| 74 | reg_alloc.FreeReg(reg); | ||
| 75 | } | ||
| 76 | return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}}; | ||
| 77 | } | ||
| 78 | |||
| 79 | private: | ||
| 80 | RegAlloc& reg_alloc; | ||
| 81 | IR::Inst* inst{}; | ||
| 82 | Register reg{}; | ||
| 83 | }; | ||
| 84 | |||
| 85 | template <typename ArgType> | ||
| 86 | class ValueWrapper { | ||
| 87 | public: | ||
| 88 | ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_) | ||
| 89 | : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {} | ||
| 90 | |||
| 91 | ArgType Extract() { | ||
| 92 | if (!ir_value.IsImmediate()) { | ||
| 93 | reg_alloc.Unref(*ir_value.InstRecursive()); | ||
| 94 | } | ||
| 95 | return value; | ||
| 96 | } | ||
| 97 | |||
| 98 | private: | ||
| 99 | RegAlloc& reg_alloc; | ||
| 100 | const IR::Value& ir_value; | ||
| 101 | ArgType value; | ||
| 102 | }; | ||
| 103 | |||
| 104 | template <typename ArgType> | ||
| 105 | auto Arg(EmitContext& ctx, const IR::Value& arg) { | ||
| 106 | if constexpr (std::is_same_v<ArgType, Register>) { | ||
| 107 | return RegWrapper<false>{ctx, arg}; | ||
| 108 | } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) { | ||
| 109 | return RegWrapper<true>{ctx, arg}; | ||
| 110 | } else if constexpr (std::is_base_of_v<Value, ArgType>) { | ||
| 111 | return ValueWrapper<ArgType>{ctx, arg}; | ||
| 112 | } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { | ||
| 113 | return Identity<const IR::Value&>{arg}; | ||
| 114 | } else if constexpr (std::is_same_v<ArgType, u32>) { | ||
| 115 | return Identity{arg.U32()}; | ||
| 116 | } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { | ||
| 117 | return Identity{arg.Attribute()}; | ||
| 118 | } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { | ||
| 119 | return Identity{arg.Patch()}; | ||
| 120 | } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { | ||
| 121 | return Identity{arg.Reg()}; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | |||
| 125 | template <auto func, bool is_first_arg_inst> | ||
| 126 | struct InvokeCall { | ||
| 127 | template <typename... Args> | ||
| 128 | InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { | ||
| 129 | if constexpr (is_first_arg_inst) { | ||
| 130 | func(ctx, *inst, args.Extract()...); | ||
| 131 | } else { | ||
| 132 | func(ctx, args.Extract()...); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | }; | ||
| 136 | |||
| 137 | template <auto func, bool is_first_arg_inst, size_t... I> | ||
| 138 | void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { | ||
| 139 | using Traits = FuncTraits<decltype(func)>; | ||
| 140 | if constexpr (is_first_arg_inst) { | ||
| 141 | InvokeCall<func, is_first_arg_inst>{ | ||
| 142 | ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...}; | ||
| 143 | } else { | ||
| 144 | InvokeCall<func, is_first_arg_inst>{ | ||
| 145 | ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...}; | ||
| 146 | } | ||
| 147 | } | ||
| 148 | |||
| 149 | template <auto func> | ||
| 150 | void Invoke(EmitContext& ctx, IR::Inst* inst) { | ||
| 151 | using Traits = FuncTraits<decltype(func)>; | ||
| 152 | static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); | ||
| 153 | if constexpr (Traits::NUM_ARGS == 1) { | ||
| 154 | Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); | ||
| 155 | } else { | ||
| 156 | using FirstArgType = typename Traits::template ArgType<1>; | ||
| 157 | static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; | ||
| 158 | using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; | ||
| 159 | Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | void EmitInst(EmitContext& ctx, IR::Inst* inst) { | ||
| 164 | switch (inst->GetOpcode()) { | ||
| 165 | #define OPCODE(name, result_type, ...) \ | ||
| 166 | case IR::Opcode::name: \ | ||
| 167 | return Invoke<&Emit##name>(ctx, inst); | ||
| 168 | #include "shader_recompiler/frontend/ir/opcodes.inc" | ||
| 169 | #undef OPCODE | ||
| 170 | } | ||
| 171 | throw LogicError("Invalid opcode {}", inst->GetOpcode()); | ||
| 172 | } | ||
| 173 | |||
| 174 | bool IsReference(IR::Inst& inst) { | ||
| 175 | return inst.GetOpcode() == IR::Opcode::Reference; | ||
| 176 | } | ||
| 177 | |||
| 178 | void PrecolorInst(IR::Inst& phi) { | ||
| 179 | // Insert phi moves before references to avoid overwritting other phis | ||
| 180 | const size_t num_args{phi.NumArgs()}; | ||
| 181 | for (size_t i = 0; i < num_args; ++i) { | ||
| 182 | IR::Block& phi_block{*phi.PhiBlock(i)}; | ||
| 183 | auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; | ||
| 184 | IR::IREmitter ir{phi_block, it}; | ||
| 185 | const IR::Value arg{phi.Arg(i)}; | ||
| 186 | if (arg.IsImmediate()) { | ||
| 187 | ir.PhiMove(phi, arg); | ||
| 188 | } else { | ||
| 189 | ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); | ||
| 190 | } | ||
| 191 | } | ||
| 192 | for (size_t i = 0; i < num_args; ++i) { | ||
| 193 | IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | void Precolor(const IR::Program& program) { | ||
| 198 | for (IR::Block* const block : program.blocks) { | ||
| 199 | for (IR::Inst& phi : block->Instructions()) { | ||
| 200 | if (!IR::IsPhi(phi)) { | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | PrecolorInst(phi); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitCode(EmitContext& ctx, const IR::Program& program) { | ||
| 209 | const auto eval{ | ||
| 210 | [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }}; | ||
| 211 | for (const IR::AbstractSyntaxNode& node : program.syntax_list) { | ||
| 212 | switch (node.type) { | ||
| 213 | case IR::AbstractSyntaxNode::Type::Block: | ||
| 214 | for (IR::Inst& inst : node.data.block->Instructions()) { | ||
| 215 | EmitInst(ctx, &inst); | ||
| 216 | } | ||
| 217 | break; | ||
| 218 | case IR::AbstractSyntaxNode::Type::If: | ||
| 219 | ctx.Add("MOV.S.CC RC,{};" | ||
| 220 | "IF NE.x;", | ||
| 221 | eval(node.data.if_node.cond)); | ||
| 222 | break; | ||
| 223 | case IR::AbstractSyntaxNode::Type::EndIf: | ||
| 224 | ctx.Add("ENDIF;"); | ||
| 225 | break; | ||
| 226 | case IR::AbstractSyntaxNode::Type::Loop: | ||
| 227 | ctx.Add("REP;"); | ||
| 228 | break; | ||
| 229 | case IR::AbstractSyntaxNode::Type::Repeat: | ||
| 230 | if (!Settings::values.disable_shader_loop_safety_checks) { | ||
| 231 | const u32 loop_index{ctx.num_safety_loop_vars++}; | ||
| 232 | const u32 vector_index{loop_index / 4}; | ||
| 233 | const char component{"xyzw"[loop_index % 4]}; | ||
| 234 | ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;" | ||
| 235 | "BRK(LT.{});", | ||
| 236 | vector_index, component, vector_index, component, component); | ||
| 237 | } | ||
| 238 | if (node.data.repeat.cond.IsImmediate()) { | ||
| 239 | if (node.data.repeat.cond.U1()) { | ||
| 240 | ctx.Add("ENDREP;"); | ||
| 241 | } else { | ||
| 242 | ctx.Add("BRK;" | ||
| 243 | "ENDREP;"); | ||
| 244 | } | ||
| 245 | } else { | ||
| 246 | ctx.Add("MOV.S.CC RC,{};" | ||
| 247 | "BRK(EQ.x);" | ||
| 248 | "ENDREP;", | ||
| 249 | eval(node.data.repeat.cond)); | ||
| 250 | } | ||
| 251 | break; | ||
| 252 | case IR::AbstractSyntaxNode::Type::Break: | ||
| 253 | if (node.data.break_node.cond.IsImmediate()) { | ||
| 254 | if (node.data.break_node.cond.U1()) { | ||
| 255 | ctx.Add("BRK;"); | ||
| 256 | } | ||
| 257 | } else { | ||
| 258 | ctx.Add("MOV.S.CC RC,{};" | ||
| 259 | "BRK (NE.x);", | ||
| 260 | eval(node.data.break_node.cond)); | ||
| 261 | } | ||
| 262 | break; | ||
| 263 | case IR::AbstractSyntaxNode::Type::Return: | ||
| 264 | case IR::AbstractSyntaxNode::Type::Unreachable: | ||
| 265 | ctx.Add("RET;"); | ||
| 266 | break; | ||
| 267 | } | ||
| 268 | } | ||
| 269 | if (!ctx.reg_alloc.IsEmpty()) { | ||
| 270 | LOG_WARNING(Shader_GLASM, "Register leak after generating code"); | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | void SetupOptions(const IR::Program& program, const Profile& profile, | ||
| 275 | const RuntimeInfo& runtime_info, std::string& header) { | ||
| 276 | const Info& info{program.info}; | ||
| 277 | const Stage stage{program.stage}; | ||
| 278 | |||
| 279 | // TODO: Track the shared atomic ops | ||
| 280 | header += "OPTION NV_internal;" | ||
| 281 | "OPTION NV_shader_storage_buffer;" | ||
| 282 | "OPTION NV_gpu_program_fp64;"; | ||
| 283 | if (info.uses_int64_bit_atomics) { | ||
| 284 | header += "OPTION NV_shader_atomic_int64;"; | ||
| 285 | } | ||
| 286 | if (info.uses_atomic_f32_add) { | ||
| 287 | header += "OPTION NV_shader_atomic_float;"; | ||
| 288 | } | ||
| 289 | if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { | ||
| 290 | header += "OPTION NV_shader_atomic_fp16_vector;"; | ||
| 291 | } | ||
| 292 | if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || | ||
| 293 | info.uses_fswzadd) { | ||
| 294 | header += "OPTION NV_shader_thread_group;"; | ||
| 295 | } | ||
| 296 | if (info.uses_subgroup_shuffles) { | ||
| 297 | header += "OPTION NV_shader_thread_shuffle;"; | ||
| 298 | } | ||
| 299 | if (info.uses_sparse_residency) { | ||
| 300 | header += "OPTION EXT_sparse_texture2;"; | ||
| 301 | } | ||
| 302 | const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || | ||
| 303 | info.stores[IR::Attribute::Layer]}; | ||
| 304 | if ((stage != Stage::Geometry && stores_viewport_layer) || | ||
| 305 | info.stores[IR::Attribute::ViewportMask]) { | ||
| 306 | if (profile.support_viewport_index_layer_non_geometry) { | ||
| 307 | header += "OPTION NV_viewport_array2;"; | ||
| 308 | } | ||
| 309 | } | ||
| 310 | if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) { | ||
| 311 | header += "OPTION NV_geometry_shader_passthrough;"; | ||
| 312 | } | ||
| 313 | if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { | ||
| 314 | header += "OPTION EXT_shader_image_load_formatted;"; | ||
| 315 | } | ||
| 316 | if (profile.support_derivative_control) { | ||
| 317 | header += "OPTION ARB_derivative_control;"; | ||
| 318 | } | ||
| 319 | if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { | ||
| 320 | header += "OPTION NV_early_fragment_tests;"; | ||
| 321 | } | ||
| 322 | if (stage == Stage::Fragment) { | ||
| 323 | header += "OPTION ARB_draw_buffers;"; | ||
| 324 | } | ||
| 325 | } | ||
| 326 | |||
| 327 | std::string_view StageHeader(Stage stage) { | ||
| 328 | switch (stage) { | ||
| 329 | case Stage::VertexA: | ||
| 330 | case Stage::VertexB: | ||
| 331 | return "!!NVvp5.0\n"; | ||
| 332 | case Stage::TessellationControl: | ||
| 333 | return "!!NVtcp5.0\n"; | ||
| 334 | case Stage::TessellationEval: | ||
| 335 | return "!!NVtep5.0\n"; | ||
| 336 | case Stage::Geometry: | ||
| 337 | return "!!NVgp5.0\n"; | ||
| 338 | case Stage::Fragment: | ||
| 339 | return "!!NVfp5.0\n"; | ||
| 340 | case Stage::Compute: | ||
| 341 | return "!!NVcp5.0\n"; | ||
| 342 | } | ||
| 343 | throw InvalidArgument("Invalid stage {}", stage); | ||
| 344 | } | ||
| 345 | |||
| 346 | std::string_view InputPrimitive(InputTopology topology) { | ||
| 347 | switch (topology) { | ||
| 348 | case InputTopology::Points: | ||
| 349 | return "POINTS"; | ||
| 350 | case InputTopology::Lines: | ||
| 351 | return "LINES"; | ||
| 352 | case InputTopology::LinesAdjacency: | ||
| 353 | return "LINESS_ADJACENCY"; | ||
| 354 | case InputTopology::Triangles: | ||
| 355 | return "TRIANGLES"; | ||
| 356 | case InputTopology::TrianglesAdjacency: | ||
| 357 | return "TRIANGLES_ADJACENCY"; | ||
| 358 | } | ||
| 359 | throw InvalidArgument("Invalid input topology {}", topology); | ||
| 360 | } | ||
| 361 | |||
| 362 | std::string_view OutputPrimitive(OutputTopology topology) { | ||
| 363 | switch (topology) { | ||
| 364 | case OutputTopology::PointList: | ||
| 365 | return "POINTS"; | ||
| 366 | case OutputTopology::LineStrip: | ||
| 367 | return "LINE_STRIP"; | ||
| 368 | case OutputTopology::TriangleStrip: | ||
| 369 | return "TRIANGLE_STRIP"; | ||
| 370 | } | ||
| 371 | throw InvalidArgument("Invalid output topology {}", topology); | ||
| 372 | } | ||
| 373 | |||
| 374 | std::string_view GetTessMode(TessPrimitive primitive) { | ||
| 375 | switch (primitive) { | ||
| 376 | case TessPrimitive::Triangles: | ||
| 377 | return "TRIANGLES"; | ||
| 378 | case TessPrimitive::Quads: | ||
| 379 | return "QUADS"; | ||
| 380 | case TessPrimitive::Isolines: | ||
| 381 | return "ISOLINES"; | ||
| 382 | } | ||
| 383 | throw InvalidArgument("Invalid tessellation primitive {}", primitive); | ||
| 384 | } | ||
| 385 | |||
| 386 | std::string_view GetTessSpacing(TessSpacing spacing) { | ||
| 387 | switch (spacing) { | ||
| 388 | case TessSpacing::Equal: | ||
| 389 | return "EQUAL"; | ||
| 390 | case TessSpacing::FractionalOdd: | ||
| 391 | return "FRACTIONAL_ODD"; | ||
| 392 | case TessSpacing::FractionalEven: | ||
| 393 | return "FRACTIONAL_EVEN"; | ||
| 394 | } | ||
| 395 | throw InvalidArgument("Invalid tessellation spacing {}", spacing); | ||
| 396 | } | ||
| 397 | } // Anonymous namespace | ||
| 398 | |||
| 399 | std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, | ||
| 400 | Bindings& bindings) { | ||
| 401 | EmitContext ctx{program, bindings, profile, runtime_info}; | ||
| 402 | Precolor(program); | ||
| 403 | EmitCode(ctx, program); | ||
| 404 | std::string header{StageHeader(program.stage)}; | ||
| 405 | SetupOptions(program, profile, runtime_info, header); | ||
| 406 | switch (program.stage) { | ||
| 407 | case Stage::TessellationControl: | ||
| 408 | header += fmt::format("VERTICES_OUT {};", program.invocations); | ||
| 409 | break; | ||
| 410 | case Stage::TessellationEval: | ||
| 411 | header += fmt::format("TESS_MODE {};" | ||
| 412 | "TESS_SPACING {};" | ||
| 413 | "TESS_VERTEX_ORDER {};", | ||
| 414 | GetTessMode(runtime_info.tess_primitive), | ||
| 415 | GetTessSpacing(runtime_info.tess_spacing), | ||
| 416 | runtime_info.tess_clockwise ? "CW" : "CCW"); | ||
| 417 | break; | ||
| 418 | case Stage::Geometry: | ||
| 419 | header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology)); | ||
| 420 | if (program.is_geometry_passthrough) { | ||
| 421 | if (profile.support_geometry_shader_passthrough) { | ||
| 422 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 423 | if (program.info.passthrough.Generic(index)) { | ||
| 424 | header += fmt::format("PASSTHROUGH result.attrib[{}];", index); | ||
| 425 | } | ||
| 426 | } | ||
| 427 | if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) { | ||
| 428 | header += "PASSTHROUGH result.position;"; | ||
| 429 | } | ||
| 430 | } else { | ||
| 431 | LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported"); | ||
| 432 | } | ||
| 433 | } else { | ||
| 434 | header += | ||
| 435 | fmt::format("VERTICES_OUT {};" | ||
| 436 | "PRIMITIVE_OUT {};", | ||
| 437 | program.output_vertices, OutputPrimitive(program.output_topology)); | ||
| 438 | } | ||
| 439 | break; | ||
| 440 | case Stage::Compute: | ||
| 441 | header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], | ||
| 442 | program.workgroup_size[1], program.workgroup_size[2]); | ||
| 443 | break; | ||
| 444 | default: | ||
| 445 | break; | ||
| 446 | } | ||
| 447 | if (program.shared_memory_size > 0) { | ||
| 448 | header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); | ||
| 449 | header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); | ||
| 450 | } | ||
| 451 | header += "TEMP "; | ||
| 452 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { | ||
| 453 | header += fmt::format("R{},", index); | ||
| 454 | } | ||
| 455 | if (program.local_memory_size > 0) { | ||
| 456 | header += fmt::format("lmem[{}],", program.local_memory_size); | ||
| 457 | } | ||
| 458 | if (program.info.uses_fswzadd) { | ||
| 459 | header += "FSWZA[4],FSWZB[4],"; | ||
| 460 | } | ||
| 461 | const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)}; | ||
| 462 | for (u32 index = 0; index < num_safety_loop_vectors; ++index) { | ||
| 463 | header += fmt::format("loop{},", index); | ||
| 464 | } | ||
| 465 | header += "RC;" | ||
| 466 | "LONG TEMP "; | ||
| 467 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { | ||
| 468 | header += fmt::format("D{},", index); | ||
| 469 | } | ||
| 470 | header += "DC;"; | ||
| 471 | if (program.info.uses_fswzadd) { | ||
| 472 | header += "MOV.F FSWZA[0],-1;" | ||
| 473 | "MOV.F FSWZA[1],1;" | ||
| 474 | "MOV.F FSWZA[2],-1;" | ||
| 475 | "MOV.F FSWZA[3],0;" | ||
| 476 | "MOV.F FSWZB[0],-1;" | ||
| 477 | "MOV.F FSWZB[1],-1;" | ||
| 478 | "MOV.F FSWZB[2],1;" | ||
| 479 | "MOV.F FSWZB[3],-1;"; | ||
| 480 | } | ||
| 481 | for (u32 index = 0; index < num_safety_loop_vectors; ++index) { | ||
| 482 | header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index); | ||
| 483 | } | ||
| 484 | if (ctx.uses_y_direction) { | ||
| 485 | header += "PARAM y_direction[1]={state.material.front.ambient};"; | ||
| 486 | } | ||
| 487 | ctx.code.insert(0, header); | ||
| 488 | ctx.code += "END"; | ||
| 489 | return ctx.code; | ||
| 490 | } | ||
| 491 | |||
| 492 | } // namespace Shader::Backend::GLASM | ||