summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/backend/glasm/emit_glasm.cpp
diff options
context:
space:
mode:
authorGravatar bunnei2021-07-25 11:39:04 -0700
committerGravatar GitHub2021-07-25 11:39:04 -0700
commit98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch)
tree816faa96c2c4d291825063433331a8ea4b3d08f1 /src/shader_recompiler/backend/glasm/emit_glasm.cpp
parentMerge pull request #6699 from lat9nq/common-threads (diff)
parentshader: Support out of bound local memory reads and immediate writes (diff)
downloadyuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip
Merge pull request #6585 from ameerj/hades
Shader Decompiler Rewrite
Diffstat (limited to 'src/shader_recompiler/backend/glasm/emit_glasm.cpp')
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.cpp492
1 files changed, 492 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
new file mode 100644
index 000000000..a5e8c9b6e
--- /dev/null
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -0,0 +1,492 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7#include <tuple>
8
9#include "common/div_ceil.h"
10#include "common/settings.h"
11#include "shader_recompiler/backend/bindings.h"
12#include "shader_recompiler/backend/glasm/emit_context.h"
13#include "shader_recompiler/backend/glasm/emit_glasm.h"
14#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
15#include "shader_recompiler/frontend/ir/ir_emitter.h"
16#include "shader_recompiler/frontend/ir/program.h"
17#include "shader_recompiler/profile.h"
18#include "shader_recompiler/runtime_info.h"
19
20namespace Shader::Backend::GLASM {
21namespace {
22template <class Func>
23struct FuncTraits {};
24
25template <class ReturnType_, class... Args>
26struct FuncTraits<ReturnType_ (*)(Args...)> {
27 using ReturnType = ReturnType_;
28
29 static constexpr size_t NUM_ARGS = sizeof...(Args);
30
31 template <size_t I>
32 using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
33};
34
35template <typename T>
36struct Identity {
37 Identity(T data_) : data{data_} {}
38
39 T Extract() {
40 return data;
41 }
42
43 T data;
44};
45
46template <bool scalar>
47class RegWrapper {
48public:
49 RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} {
50 const Value value{reg_alloc.Peek(ir_value)};
51 if (value.type == Type::Register) {
52 inst = ir_value.InstRecursive();
53 reg = Register{value};
54 } else {
55 reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg();
56 }
57 switch (value.type) {
58 case Type::Register:
59 case Type::Void:
60 break;
61 case Type::U32:
62 ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32);
63 break;
64 case Type::U64:
65 ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64);
66 break;
67 }
68 }
69
70 auto Extract() {
71 if (inst) {
72 reg_alloc.Unref(*inst);
73 } else {
74 reg_alloc.FreeReg(reg);
75 }
76 return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}};
77 }
78
79private:
80 RegAlloc& reg_alloc;
81 IR::Inst* inst{};
82 Register reg{};
83};
84
85template <typename ArgType>
86class ValueWrapper {
87public:
88 ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_)
89 : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {}
90
91 ArgType Extract() {
92 if (!ir_value.IsImmediate()) {
93 reg_alloc.Unref(*ir_value.InstRecursive());
94 }
95 return value;
96 }
97
98private:
99 RegAlloc& reg_alloc;
100 const IR::Value& ir_value;
101 ArgType value;
102};
103
104template <typename ArgType>
105auto Arg(EmitContext& ctx, const IR::Value& arg) {
106 if constexpr (std::is_same_v<ArgType, Register>) {
107 return RegWrapper<false>{ctx, arg};
108 } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) {
109 return RegWrapper<true>{ctx, arg};
110 } else if constexpr (std::is_base_of_v<Value, ArgType>) {
111 return ValueWrapper<ArgType>{ctx, arg};
112 } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) {
113 return Identity<const IR::Value&>{arg};
114 } else if constexpr (std::is_same_v<ArgType, u32>) {
115 return Identity{arg.U32()};
116 } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) {
117 return Identity{arg.Attribute()};
118 } else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
119 return Identity{arg.Patch()};
120 } else if constexpr (std::is_same_v<ArgType, IR::Reg>) {
121 return Identity{arg.Reg()};
122 }
123}
124
125template <auto func, bool is_first_arg_inst>
126struct InvokeCall {
127 template <typename... Args>
128 InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) {
129 if constexpr (is_first_arg_inst) {
130 func(ctx, *inst, args.Extract()...);
131 } else {
132 func(ctx, args.Extract()...);
133 }
134 }
135};
136
137template <auto func, bool is_first_arg_inst, size_t... I>
138void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) {
139 using Traits = FuncTraits<decltype(func)>;
140 if constexpr (is_first_arg_inst) {
141 InvokeCall<func, is_first_arg_inst>{
142 ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...};
143 } else {
144 InvokeCall<func, is_first_arg_inst>{
145 ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...};
146 }
147}
148
149template <auto func>
150void Invoke(EmitContext& ctx, IR::Inst* inst) {
151 using Traits = FuncTraits<decltype(func)>;
152 static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments");
153 if constexpr (Traits::NUM_ARGS == 1) {
154 Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{});
155 } else {
156 using FirstArgType = typename Traits::template ArgType<1>;
157 static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>;
158 using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>;
159 Invoke<func, is_first_arg_inst>(ctx, inst, Indices{});
160 }
161}
162
163void EmitInst(EmitContext& ctx, IR::Inst* inst) {
164 switch (inst->GetOpcode()) {
165#define OPCODE(name, result_type, ...) \
166 case IR::Opcode::name: \
167 return Invoke<&Emit##name>(ctx, inst);
168#include "shader_recompiler/frontend/ir/opcodes.inc"
169#undef OPCODE
170 }
171 throw LogicError("Invalid opcode {}", inst->GetOpcode());
172}
173
174bool IsReference(IR::Inst& inst) {
175 return inst.GetOpcode() == IR::Opcode::Reference;
176}
177
178void PrecolorInst(IR::Inst& phi) {
179 // Insert phi moves before references to avoid overwritting other phis
180 const size_t num_args{phi.NumArgs()};
181 for (size_t i = 0; i < num_args; ++i) {
182 IR::Block& phi_block{*phi.PhiBlock(i)};
183 auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()};
184 IR::IREmitter ir{phi_block, it};
185 const IR::Value arg{phi.Arg(i)};
186 if (arg.IsImmediate()) {
187 ir.PhiMove(phi, arg);
188 } else {
189 ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())});
190 }
191 }
192 for (size_t i = 0; i < num_args; ++i) {
193 IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi});
194 }
195}
196
197void Precolor(const IR::Program& program) {
198 for (IR::Block* const block : program.blocks) {
199 for (IR::Inst& phi : block->Instructions()) {
200 if (!IR::IsPhi(phi)) {
201 break;
202 }
203 PrecolorInst(phi);
204 }
205 }
206}
207
208void EmitCode(EmitContext& ctx, const IR::Program& program) {
209 const auto eval{
210 [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }};
211 for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
212 switch (node.type) {
213 case IR::AbstractSyntaxNode::Type::Block:
214 for (IR::Inst& inst : node.data.block->Instructions()) {
215 EmitInst(ctx, &inst);
216 }
217 break;
218 case IR::AbstractSyntaxNode::Type::If:
219 ctx.Add("MOV.S.CC RC,{};"
220 "IF NE.x;",
221 eval(node.data.if_node.cond));
222 break;
223 case IR::AbstractSyntaxNode::Type::EndIf:
224 ctx.Add("ENDIF;");
225 break;
226 case IR::AbstractSyntaxNode::Type::Loop:
227 ctx.Add("REP;");
228 break;
229 case IR::AbstractSyntaxNode::Type::Repeat:
230 if (!Settings::values.disable_shader_loop_safety_checks) {
231 const u32 loop_index{ctx.num_safety_loop_vars++};
232 const u32 vector_index{loop_index / 4};
233 const char component{"xyzw"[loop_index % 4]};
234 ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;"
235 "BRK(LT.{});",
236 vector_index, component, vector_index, component, component);
237 }
238 if (node.data.repeat.cond.IsImmediate()) {
239 if (node.data.repeat.cond.U1()) {
240 ctx.Add("ENDREP;");
241 } else {
242 ctx.Add("BRK;"
243 "ENDREP;");
244 }
245 } else {
246 ctx.Add("MOV.S.CC RC,{};"
247 "BRK(EQ.x);"
248 "ENDREP;",
249 eval(node.data.repeat.cond));
250 }
251 break;
252 case IR::AbstractSyntaxNode::Type::Break:
253 if (node.data.break_node.cond.IsImmediate()) {
254 if (node.data.break_node.cond.U1()) {
255 ctx.Add("BRK;");
256 }
257 } else {
258 ctx.Add("MOV.S.CC RC,{};"
259 "BRK (NE.x);",
260 eval(node.data.break_node.cond));
261 }
262 break;
263 case IR::AbstractSyntaxNode::Type::Return:
264 case IR::AbstractSyntaxNode::Type::Unreachable:
265 ctx.Add("RET;");
266 break;
267 }
268 }
269 if (!ctx.reg_alloc.IsEmpty()) {
270 LOG_WARNING(Shader_GLASM, "Register leak after generating code");
271 }
272}
273
274void SetupOptions(const IR::Program& program, const Profile& profile,
275 const RuntimeInfo& runtime_info, std::string& header) {
276 const Info& info{program.info};
277 const Stage stage{program.stage};
278
279 // TODO: Track the shared atomic ops
280 header += "OPTION NV_internal;"
281 "OPTION NV_shader_storage_buffer;"
282 "OPTION NV_gpu_program_fp64;";
283 if (info.uses_int64_bit_atomics) {
284 header += "OPTION NV_shader_atomic_int64;";
285 }
286 if (info.uses_atomic_f32_add) {
287 header += "OPTION NV_shader_atomic_float;";
288 }
289 if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
290 header += "OPTION NV_shader_atomic_fp16_vector;";
291 }
292 if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote ||
293 info.uses_fswzadd) {
294 header += "OPTION NV_shader_thread_group;";
295 }
296 if (info.uses_subgroup_shuffles) {
297 header += "OPTION NV_shader_thread_shuffle;";
298 }
299 if (info.uses_sparse_residency) {
300 header += "OPTION EXT_sparse_texture2;";
301 }
302 const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] ||
303 info.stores[IR::Attribute::Layer]};
304 if ((stage != Stage::Geometry && stores_viewport_layer) ||
305 info.stores[IR::Attribute::ViewportMask]) {
306 if (profile.support_viewport_index_layer_non_geometry) {
307 header += "OPTION NV_viewport_array2;";
308 }
309 }
310 if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) {
311 header += "OPTION NV_geometry_shader_passthrough;";
312 }
313 if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
314 header += "OPTION EXT_shader_image_load_formatted;";
315 }
316 if (profile.support_derivative_control) {
317 header += "OPTION ARB_derivative_control;";
318 }
319 if (stage == Stage::Fragment && runtime_info.force_early_z != 0) {
320 header += "OPTION NV_early_fragment_tests;";
321 }
322 if (stage == Stage::Fragment) {
323 header += "OPTION ARB_draw_buffers;";
324 }
325}
326
327std::string_view StageHeader(Stage stage) {
328 switch (stage) {
329 case Stage::VertexA:
330 case Stage::VertexB:
331 return "!!NVvp5.0\n";
332 case Stage::TessellationControl:
333 return "!!NVtcp5.0\n";
334 case Stage::TessellationEval:
335 return "!!NVtep5.0\n";
336 case Stage::Geometry:
337 return "!!NVgp5.0\n";
338 case Stage::Fragment:
339 return "!!NVfp5.0\n";
340 case Stage::Compute:
341 return "!!NVcp5.0\n";
342 }
343 throw InvalidArgument("Invalid stage {}", stage);
344}
345
346std::string_view InputPrimitive(InputTopology topology) {
347 switch (topology) {
348 case InputTopology::Points:
349 return "POINTS";
350 case InputTopology::Lines:
351 return "LINES";
352 case InputTopology::LinesAdjacency:
353 return "LINESS_ADJACENCY";
354 case InputTopology::Triangles:
355 return "TRIANGLES";
356 case InputTopology::TrianglesAdjacency:
357 return "TRIANGLES_ADJACENCY";
358 }
359 throw InvalidArgument("Invalid input topology {}", topology);
360}
361
362std::string_view OutputPrimitive(OutputTopology topology) {
363 switch (topology) {
364 case OutputTopology::PointList:
365 return "POINTS";
366 case OutputTopology::LineStrip:
367 return "LINE_STRIP";
368 case OutputTopology::TriangleStrip:
369 return "TRIANGLE_STRIP";
370 }
371 throw InvalidArgument("Invalid output topology {}", topology);
372}
373
374std::string_view GetTessMode(TessPrimitive primitive) {
375 switch (primitive) {
376 case TessPrimitive::Triangles:
377 return "TRIANGLES";
378 case TessPrimitive::Quads:
379 return "QUADS";
380 case TessPrimitive::Isolines:
381 return "ISOLINES";
382 }
383 throw InvalidArgument("Invalid tessellation primitive {}", primitive);
384}
385
386std::string_view GetTessSpacing(TessSpacing spacing) {
387 switch (spacing) {
388 case TessSpacing::Equal:
389 return "EQUAL";
390 case TessSpacing::FractionalOdd:
391 return "FRACTIONAL_ODD";
392 case TessSpacing::FractionalEven:
393 return "FRACTIONAL_EVEN";
394 }
395 throw InvalidArgument("Invalid tessellation spacing {}", spacing);
396}
397} // Anonymous namespace
398
399std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program,
400 Bindings& bindings) {
401 EmitContext ctx{program, bindings, profile, runtime_info};
402 Precolor(program);
403 EmitCode(ctx, program);
404 std::string header{StageHeader(program.stage)};
405 SetupOptions(program, profile, runtime_info, header);
406 switch (program.stage) {
407 case Stage::TessellationControl:
408 header += fmt::format("VERTICES_OUT {};", program.invocations);
409 break;
410 case Stage::TessellationEval:
411 header += fmt::format("TESS_MODE {};"
412 "TESS_SPACING {};"
413 "TESS_VERTEX_ORDER {};",
414 GetTessMode(runtime_info.tess_primitive),
415 GetTessSpacing(runtime_info.tess_spacing),
416 runtime_info.tess_clockwise ? "CW" : "CCW");
417 break;
418 case Stage::Geometry:
419 header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology));
420 if (program.is_geometry_passthrough) {
421 if (profile.support_geometry_shader_passthrough) {
422 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
423 if (program.info.passthrough.Generic(index)) {
424 header += fmt::format("PASSTHROUGH result.attrib[{}];", index);
425 }
426 }
427 if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
428 header += "PASSTHROUGH result.position;";
429 }
430 } else {
431 LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported");
432 }
433 } else {
434 header +=
435 fmt::format("VERTICES_OUT {};"
436 "PRIMITIVE_OUT {};",
437 program.output_vertices, OutputPrimitive(program.output_topology));
438 }
439 break;
440 case Stage::Compute:
441 header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0],
442 program.workgroup_size[1], program.workgroup_size[2]);
443 break;
444 default:
445 break;
446 }
447 if (program.shared_memory_size > 0) {
448 header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size);
449 header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};");
450 }
451 header += "TEMP ";
452 for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) {
453 header += fmt::format("R{},", index);
454 }
455 if (program.local_memory_size > 0) {
456 header += fmt::format("lmem[{}],", program.local_memory_size);
457 }
458 if (program.info.uses_fswzadd) {
459 header += "FSWZA[4],FSWZB[4],";
460 }
461 const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)};
462 for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
463 header += fmt::format("loop{},", index);
464 }
465 header += "RC;"
466 "LONG TEMP ";
467 for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) {
468 header += fmt::format("D{},", index);
469 }
470 header += "DC;";
471 if (program.info.uses_fswzadd) {
472 header += "MOV.F FSWZA[0],-1;"
473 "MOV.F FSWZA[1],1;"
474 "MOV.F FSWZA[2],-1;"
475 "MOV.F FSWZA[3],0;"
476 "MOV.F FSWZB[0],-1;"
477 "MOV.F FSWZB[1],-1;"
478 "MOV.F FSWZB[2],1;"
479 "MOV.F FSWZB[3],-1;";
480 }
481 for (u32 index = 0; index < num_safety_loop_vectors; ++index) {
482 header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index);
483 }
484 if (ctx.uses_y_direction) {
485 header += "PARAM y_direction[1]={state.material.front.ambient};";
486 }
487 ctx.code.insert(0, header);
488 ctx.code += "END";
489 return ctx.code;
490}
491
492} // namespace Shader::Backend::GLASM