summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-02-16 20:52:12 -0300
committerGravatar ameerj2021-07-22 21:51:22 -0400
commitc67d64365a712830fe140dd36e24e2efd9b8a812 (patch)
tree9287589f2b72d1cbd0cb113c2024b2bc531408c3 /src/video_core/renderer_opengl
parentshader: Add XMAD multiplication folding optimization (diff)
downloadyuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.gz
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.xz
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.zip
shader: Remove old shader management
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2124
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp314
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp564
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h102
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2986
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h69
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp482
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h176
10 files changed, 8 insertions, 6871 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
deleted file mode 100644
index e8d8d2aa5..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ /dev/null
@@ -1,2124 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <variant>
12
13#include <fmt/format.h>
14
15#include "common/alignment.h"
16#include "common/assert.h"
17#include "common/common_types.h"
18#include "video_core/renderer_opengl/gl_arb_decompiler.h"
19#include "video_core/renderer_opengl/gl_device.h"
20#include "video_core/shader/registry.h"
21#include "video_core/shader/shader_ir.h"
22
23// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
24// GLASM lacks booleans, so they have to be implemented as integers.
25// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
26// select between two values, because -1 will be evaluated as true and 0 as false.
27
28namespace OpenGL {
29
30namespace {
31
32using Tegra::Engines::ShaderType;
33using Tegra::Shader::Attribute;
34using Tegra::Shader::PixelImap;
35using Tegra::Shader::Register;
36using namespace VideoCommon::Shader;
37using Operation = const OperationNode&;
38
39constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
40
41char Swizzle(std::size_t component) {
42 static constexpr std::string_view SWIZZLE{"xyzw"};
43 return SWIZZLE.at(component);
44}
45
46constexpr bool IsGenericAttribute(Attribute::Index index) {
47 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
48}
49
50u32 GetGenericAttributeIndex(Attribute::Index index) {
51 ASSERT(IsGenericAttribute(index));
52 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
53}
54
55std::string_view Modifiers(Operation operation) {
56 const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
57 if (meta && meta->precise) {
58 return ".PREC";
59 }
60 return "";
61}
62
63std::string_view GetInputFlags(PixelImap attribute) {
64 switch (attribute) {
65 case PixelImap::Perspective:
66 return "";
67 case PixelImap::Constant:
68 return "FLAT ";
69 case PixelImap::ScreenLinear:
70 return "NOPERSPECTIVE ";
71 case PixelImap::Unused:
72 break;
73 }
74 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
75 return {};
76}
77
78std::string_view ImageType(Tegra::Shader::ImageType image_type) {
79 switch (image_type) {
80 case Tegra::Shader::ImageType::Texture1D:
81 return "1D";
82 case Tegra::Shader::ImageType::TextureBuffer:
83 return "BUFFER";
84 case Tegra::Shader::ImageType::Texture1DArray:
85 return "ARRAY1D";
86 case Tegra::Shader::ImageType::Texture2D:
87 return "2D";
88 case Tegra::Shader::ImageType::Texture2DArray:
89 return "ARRAY2D";
90 case Tegra::Shader::ImageType::Texture3D:
91 return "3D";
92 }
93 UNREACHABLE();
94 return {};
95}
96
97std::string_view StackName(MetaStackClass stack) {
98 switch (stack) {
99 case MetaStackClass::Ssy:
100 return "SSY";
101 case MetaStackClass::Pbk:
102 return "PBK";
103 }
104 UNREACHABLE();
105 return "";
106};
107
108std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
109 switch (topology) {
110 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
111 return "POINTS";
112 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
113 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
114 return "LINES";
115 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
116 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
117 return "LINES_ADJACENCY";
118 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
119 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
120 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
121 return "TRIANGLES";
122 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
123 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
124 return "TRIANGLES_ADJACENCY";
125 default:
126 UNIMPLEMENTED_MSG("topology={}", topology);
127 return "POINTS";
128 }
129}
130
131std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
132 switch (topology) {
133 case Tegra::Shader::OutputTopology::PointList:
134 return "POINTS";
135 case Tegra::Shader::OutputTopology::LineStrip:
136 return "LINE_STRIP";
137 case Tegra::Shader::OutputTopology::TriangleStrip:
138 return "TRIANGLE_STRIP";
139 default:
140 UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
141 return "points";
142 }
143}
144
145std::string_view StageInputName(ShaderType stage) {
146 switch (stage) {
147 case ShaderType::Vertex:
148 case ShaderType::Geometry:
149 return "vertex";
150 case ShaderType::Fragment:
151 return "fragment";
152 case ShaderType::Compute:
153 return "invocation";
154 default:
155 UNREACHABLE();
156 return "";
157 }
158}
159
160std::string TextureType(const MetaTexture& meta) {
161 if (meta.sampler.is_buffer) {
162 return "BUFFER";
163 }
164 std::string type;
165 if (meta.sampler.is_shadow) {
166 type += "SHADOW";
167 }
168 if (meta.sampler.is_array) {
169 type += "ARRAY";
170 }
171 type += [&meta] {
172 switch (meta.sampler.type) {
173 case Tegra::Shader::TextureType::Texture1D:
174 return "1D";
175 case Tegra::Shader::TextureType::Texture2D:
176 return "2D";
177 case Tegra::Shader::TextureType::Texture3D:
178 return "3D";
179 case Tegra::Shader::TextureType::TextureCube:
180 return "CUBE";
181 }
182 UNREACHABLE();
183 return "2D";
184 }();
185 return type;
186}
187
188class ARBDecompiler final {
189public:
190 explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
191 ShaderType stage_, std::string_view identifier);
192
193 std::string Code() const {
194 return shader_source;
195 }
196
197private:
198 void DefineGlobalMemory();
199
200 void DeclareHeader();
201 void DeclareVertex();
202 void DeclareGeometry();
203 void DeclareFragment();
204 void DeclareCompute();
205 void DeclareInputAttributes();
206 void DeclareOutputAttributes();
207 void DeclareLocalMemory();
208 void DeclareGlobalMemory();
209 void DeclareConstantBuffers();
210 void DeclareRegisters();
211 void DeclareTemporaries();
212 void DeclarePredicates();
213 void DeclareInternalFlags();
214
215 void InitializeVariables();
216
217 void DecompileAST();
218 void DecompileBranchMode();
219
220 void VisitAST(const ASTNode& node);
221 std::string VisitExpression(const Expr& node);
222
223 void VisitBlock(const NodeBlock& bb);
224
225 std::string Visit(const Node& node);
226
227 std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
228 std::string BuildAoffi(Operation);
229 std::string GlobalMemoryPointer(const GmemNode& gmem);
230 void Exit();
231
232 std::string Assign(Operation);
233 std::string Select(Operation);
234 std::string FClamp(Operation);
235 std::string FCastHalf0(Operation);
236 std::string FCastHalf1(Operation);
237 std::string FSqrt(Operation);
238 std::string FSwizzleAdd(Operation);
239 std::string HAdd2(Operation);
240 std::string HMul2(Operation);
241 std::string HFma2(Operation);
242 std::string HAbsolute(Operation);
243 std::string HNegate(Operation);
244 std::string HClamp(Operation);
245 std::string HCastFloat(Operation);
246 std::string HUnpack(Operation);
247 std::string HMergeF32(Operation);
248 std::string HMergeH0(Operation);
249 std::string HMergeH1(Operation);
250 std::string HPack2(Operation);
251 std::string LogicalAssign(Operation);
252 std::string LogicalPick2(Operation);
253 std::string LogicalAnd2(Operation);
254 std::string FloatOrdered(Operation);
255 std::string FloatUnordered(Operation);
256 std::string LogicalAddCarry(Operation);
257 std::string Texture(Operation);
258 std::string TextureGather(Operation);
259 std::string TextureQueryDimensions(Operation);
260 std::string TextureQueryLod(Operation);
261 std::string TexelFetch(Operation);
262 std::string TextureGradient(Operation);
263 std::string ImageLoad(Operation);
264 std::string ImageStore(Operation);
265 std::string Branch(Operation);
266 std::string BranchIndirect(Operation);
267 std::string PushFlowStack(Operation);
268 std::string PopFlowStack(Operation);
269 std::string Exit(Operation);
270 std::string Discard(Operation);
271 std::string EmitVertex(Operation);
272 std::string EndPrimitive(Operation);
273 std::string InvocationId(Operation);
274 std::string YNegate(Operation);
275 std::string ThreadId(Operation);
276 std::string ShuffleIndexed(Operation);
277 std::string Barrier(Operation);
278 std::string MemoryBarrierGroup(Operation);
279 std::string MemoryBarrierGlobal(Operation);
280
281 template <const std::string_view& op>
282 std::string Unary(Operation operation) {
283 std::string temporary = AllocTemporary();
284 AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
285 return temporary;
286 }
287
288 template <const std::string_view& op>
289 std::string Binary(Operation operation) {
290 std::string temporary = AllocTemporary();
291 AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
292 Visit(operation[1]));
293 return temporary;
294 }
295
296 template <const std::string_view& op>
297 std::string Trinary(Operation operation) {
298 std::string temporary = AllocTemporary();
299 AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
300 Visit(operation[1]), Visit(operation[2]));
301 return temporary;
302 }
303
304 template <const std::string_view& op, bool unordered>
305 std::string FloatComparison(Operation operation) {
306 std::string temporary = AllocTemporary();
307 AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
308 AddLine("MOV.S {}, 0;", temporary);
309 AddLine("MOV.S {} (NE.x), -1;", temporary);
310
311 const std::string op_a = Visit(operation[0]);
312 const std::string op_b = Visit(operation[1]);
313 if constexpr (unordered) {
314 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
315 AddLine("TRUNC.U.CC RC.x, RC.x;");
316 AddLine("MOV.S {} (NE.x), -1;", temporary);
317 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
318 AddLine("TRUNC.U.CC RC.x, RC.x;");
319 AddLine("MOV.S {} (NE.x), -1;", temporary);
320 } else if (op == SNE_F) {
321 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
322 AddLine("TRUNC.U.CC RC.x, RC.x;");
323 AddLine("MOV.S {} (NE.x), 0;", temporary);
324 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
325 AddLine("TRUNC.U.CC RC.x, RC.x;");
326 AddLine("MOV.S {} (NE.x), 0;", temporary);
327 }
328 return temporary;
329 }
330
331 template <const std::string_view& op, bool is_nan>
332 std::string HalfComparison(Operation operation) {
333 std::string tmp1 = AllocVectorTemporary();
334 const std::string tmp2 = AllocVectorTemporary();
335 const std::string op_a = Visit(operation[0]);
336 const std::string op_b = Visit(operation[1]);
337 AddLine("UP2H.F {}, {};", tmp1, op_a);
338 AddLine("UP2H.F {}, {};", tmp2, op_b);
339 AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
340 AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
341 AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
342 AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
343 AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
344 if constexpr (is_nan) {
345 AddLine("MOVC.F RC.x, {};", op_a);
346 AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
347 AddLine("MOVC.F RC.x, {};", op_b);
348 AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
349 }
350 return tmp1;
351 }
352
353 template <const std::string_view& op, const std::string_view& type>
354 std::string AtomicImage(Operation operation) {
355 const auto& meta = std::get<MetaImage>(operation.GetMeta());
356 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
357 const std::size_t num_coords = operation.GetOperandsCount();
358 const std::size_t num_values = meta.values.size();
359
360 const std::string coord = AllocVectorTemporary();
361 const std::string value = AllocVectorTemporary();
362 for (std::size_t i = 0; i < num_coords; ++i) {
363 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
364 }
365 for (std::size_t i = 0; i < num_values; ++i) {
366 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
367 }
368
369 AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
370 image_id, ImageType(meta.image.type));
371 return fmt::format("{}.x", coord);
372 }
373
374 template <const std::string_view& op, const std::string_view& type>
375 std::string Atomic(Operation operation) {
376 std::string temporary = AllocTemporary();
377 std::string address;
378 std::string_view opname;
379 bool robust = false;
380 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
381 address = GlobalMemoryPointer(*gmem);
382 opname = "ATOM";
383 robust = true;
384 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
385 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
386 opname = "ATOMS";
387 } else {
388 UNREACHABLE();
389 return "{0, 0, 0, 0}";
390 }
391 if (robust) {
392 AddLine("IF NE.x;");
393 }
394 AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
395 if (robust) {
396 AddLine("ELSE;");
397 AddLine("MOV.S {}, 0;", temporary);
398 AddLine("ENDIF;");
399 }
400 return temporary;
401 }
402
403 template <char type>
404 std::string Negate(Operation operation) {
405 std::string temporary = AllocTemporary();
406 if constexpr (type == 'F') {
407 AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
408 } else {
409 AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
410 }
411 return temporary;
412 }
413
414 template <char type>
415 std::string Absolute(Operation operation) {
416 std::string temporary = AllocTemporary();
417 AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
418 return temporary;
419 }
420
421 template <char type>
422 std::string BitfieldInsert(Operation operation) {
423 const std::string temporary = AllocVectorTemporary();
424 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
425 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
426 AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
427 Visit(operation[0]));
428 return fmt::format("{}.x", temporary);
429 }
430
431 template <char type>
432 std::string BitfieldExtract(Operation operation) {
433 const std::string temporary = AllocVectorTemporary();
434 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
435 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
436 AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
437 return fmt::format("{}.x", temporary);
438 }
439
440 template <char swizzle>
441 std::string LocalInvocationId(Operation) {
442 return fmt::format("invocation.localid.{}", swizzle);
443 }
444
445 template <char swizzle>
446 std::string WorkGroupId(Operation) {
447 return fmt::format("invocation.groupid.{}", swizzle);
448 }
449
450 template <char c1, char c2>
451 std::string ThreadMask(Operation) {
452 return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
453 }
454
455 template <typename... Args>
456 void AddExpression(std::string_view text, Args&&... args) {
457 shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...);
458 }
459
460 template <typename... Args>
461 void AddLine(std::string_view text, Args&&... args) {
462 AddExpression(text, std::forward<Args>(args)...);
463 shader_source += '\n';
464 }
465
466 std::string AllocLongVectorTemporary() {
467 max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
468 return fmt::format("L{}", num_long_temporaries++);
469 }
470
471 std::string AllocLongTemporary() {
472 return fmt::format("{}.x", AllocLongVectorTemporary());
473 }
474
475 std::string AllocVectorTemporary() {
476 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
477 return fmt::format("T{}", num_temporaries++);
478 }
479
480 std::string AllocTemporary() {
481 return fmt::format("{}.x", AllocVectorTemporary());
482 }
483
484 void ResetTemporaries() noexcept {
485 num_temporaries = 0;
486 num_long_temporaries = 0;
487 }
488
489 const Device& device;
490 const ShaderIR& ir;
491 const Registry& registry;
492 const ShaderType stage;
493
494 std::size_t num_temporaries = 0;
495 std::size_t max_temporaries = 0;
496
497 std::size_t num_long_temporaries = 0;
498 std::size_t max_long_temporaries = 0;
499
500 std::map<GlobalMemoryBase, u32> global_memory_names;
501
502 std::string shader_source;
503
504 static constexpr std::string_view ADD_F32 = "ADD.F32";
505 static constexpr std::string_view ADD_S = "ADD.S";
506 static constexpr std::string_view ADD_U = "ADD.U";
507 static constexpr std::string_view MUL_F32 = "MUL.F32";
508 static constexpr std::string_view MUL_S = "MUL.S";
509 static constexpr std::string_view MUL_U = "MUL.U";
510 static constexpr std::string_view DIV_F32 = "DIV.F32";
511 static constexpr std::string_view DIV_S = "DIV.S";
512 static constexpr std::string_view DIV_U = "DIV.U";
513 static constexpr std::string_view MAD_F32 = "MAD.F32";
514 static constexpr std::string_view RSQ_F32 = "RSQ.F32";
515 static constexpr std::string_view COS_F32 = "COS.F32";
516 static constexpr std::string_view SIN_F32 = "SIN.F32";
517 static constexpr std::string_view EX2_F32 = "EX2.F32";
518 static constexpr std::string_view LG2_F32 = "LG2.F32";
519 static constexpr std::string_view SLT_F = "SLT.F32";
520 static constexpr std::string_view SLT_S = "SLT.S";
521 static constexpr std::string_view SLT_U = "SLT.U";
522 static constexpr std::string_view SEQ_F = "SEQ.F32";
523 static constexpr std::string_view SEQ_S = "SEQ.S";
524 static constexpr std::string_view SEQ_U = "SEQ.U";
525 static constexpr std::string_view SLE_F = "SLE.F32";
526 static constexpr std::string_view SLE_S = "SLE.S";
527 static constexpr std::string_view SLE_U = "SLE.U";
528 static constexpr std::string_view SGT_F = "SGT.F32";
529 static constexpr std::string_view SGT_S = "SGT.S";
530 static constexpr std::string_view SGT_U = "SGT.U";
531 static constexpr std::string_view SNE_F = "SNE.F32";
532 static constexpr std::string_view SNE_S = "SNE.S";
533 static constexpr std::string_view SNE_U = "SNE.U";
534 static constexpr std::string_view SGE_F = "SGE.F32";
535 static constexpr std::string_view SGE_S = "SGE.S";
536 static constexpr std::string_view SGE_U = "SGE.U";
537 static constexpr std::string_view AND_S = "AND.S";
538 static constexpr std::string_view AND_U = "AND.U";
539 static constexpr std::string_view TRUNC_F = "TRUNC.F";
540 static constexpr std::string_view TRUNC_S = "TRUNC.S";
541 static constexpr std::string_view TRUNC_U = "TRUNC.U";
542 static constexpr std::string_view SHL_S = "SHL.S";
543 static constexpr std::string_view SHL_U = "SHL.U";
544 static constexpr std::string_view SHR_S = "SHR.S";
545 static constexpr std::string_view SHR_U = "SHR.U";
546 static constexpr std::string_view OR_S = "OR.S";
547 static constexpr std::string_view OR_U = "OR.U";
548 static constexpr std::string_view XOR_S = "XOR.S";
549 static constexpr std::string_view XOR_U = "XOR.U";
550 static constexpr std::string_view NOT_S = "NOT.S";
551 static constexpr std::string_view NOT_U = "NOT.U";
552 static constexpr std::string_view BTC_S = "BTC.S";
553 static constexpr std::string_view BTC_U = "BTC.U";
554 static constexpr std::string_view BTFM_S = "BTFM.S";
555 static constexpr std::string_view BTFM_U = "BTFM.U";
556 static constexpr std::string_view ROUND_F = "ROUND.F";
557 static constexpr std::string_view CEIL_F = "CEIL.F";
558 static constexpr std::string_view FLR_F = "FLR.F";
559 static constexpr std::string_view I2F_S = "I2F.S";
560 static constexpr std::string_view I2F_U = "I2F.U";
561 static constexpr std::string_view MIN_F = "MIN.F";
562 static constexpr std::string_view MIN_S = "MIN.S";
563 static constexpr std::string_view MIN_U = "MIN.U";
564 static constexpr std::string_view MAX_F = "MAX.F";
565 static constexpr std::string_view MAX_S = "MAX.S";
566 static constexpr std::string_view MAX_U = "MAX.U";
567 static constexpr std::string_view MOV_U = "MOV.U";
568 static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
569 static constexpr std::string_view TGALL_U = "TGALL.U";
570 static constexpr std::string_view TGANY_U = "TGANY.U";
571 static constexpr std::string_view TGEQ_U = "TGEQ.U";
572 static constexpr std::string_view EXCH = "EXCH";
573 static constexpr std::string_view ADD = "ADD";
574 static constexpr std::string_view MIN = "MIN";
575 static constexpr std::string_view MAX = "MAX";
576 static constexpr std::string_view AND = "AND";
577 static constexpr std::string_view OR = "OR";
578 static constexpr std::string_view XOR = "XOR";
579 static constexpr std::string_view U32 = "U32";
580 static constexpr std::string_view S32 = "S32";
581
582 static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
583 using DecompilerType = std::string (ARBDecompiler::*)(Operation);
584 static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
585 &ARBDecompiler::Assign,
586
587 &ARBDecompiler::Select,
588
589 &ARBDecompiler::Binary<ADD_F32>,
590 &ARBDecompiler::Binary<MUL_F32>,
591 &ARBDecompiler::Binary<DIV_F32>,
592 &ARBDecompiler::Trinary<MAD_F32>,
593 &ARBDecompiler::Negate<'F'>,
594 &ARBDecompiler::Absolute<'F'>,
595 &ARBDecompiler::FClamp,
596 &ARBDecompiler::FCastHalf0,
597 &ARBDecompiler::FCastHalf1,
598 &ARBDecompiler::Binary<MIN_F>,
599 &ARBDecompiler::Binary<MAX_F>,
600 &ARBDecompiler::Unary<COS_F32>,
601 &ARBDecompiler::Unary<SIN_F32>,
602 &ARBDecompiler::Unary<EX2_F32>,
603 &ARBDecompiler::Unary<LG2_F32>,
604 &ARBDecompiler::Unary<RSQ_F32>,
605 &ARBDecompiler::FSqrt,
606 &ARBDecompiler::Unary<ROUND_F>,
607 &ARBDecompiler::Unary<FLR_F>,
608 &ARBDecompiler::Unary<CEIL_F>,
609 &ARBDecompiler::Unary<TRUNC_F>,
610 &ARBDecompiler::Unary<I2F_S>,
611 &ARBDecompiler::Unary<I2F_U>,
612 &ARBDecompiler::FSwizzleAdd,
613
614 &ARBDecompiler::Binary<ADD_S>,
615 &ARBDecompiler::Binary<MUL_S>,
616 &ARBDecompiler::Binary<DIV_S>,
617 &ARBDecompiler::Negate<'S'>,
618 &ARBDecompiler::Absolute<'S'>,
619 &ARBDecompiler::Binary<MIN_S>,
620 &ARBDecompiler::Binary<MAX_S>,
621
622 &ARBDecompiler::Unary<TRUNC_S>,
623 &ARBDecompiler::Unary<MOV_U>,
624 &ARBDecompiler::Binary<SHL_S>,
625 &ARBDecompiler::Binary<SHR_U>,
626 &ARBDecompiler::Binary<SHR_S>,
627 &ARBDecompiler::Binary<AND_S>,
628 &ARBDecompiler::Binary<OR_S>,
629 &ARBDecompiler::Binary<XOR_S>,
630 &ARBDecompiler::Unary<NOT_S>,
631 &ARBDecompiler::BitfieldInsert<'S'>,
632 &ARBDecompiler::BitfieldExtract<'S'>,
633 &ARBDecompiler::Unary<BTC_S>,
634 &ARBDecompiler::Unary<BTFM_S>,
635
636 &ARBDecompiler::Binary<ADD_U>,
637 &ARBDecompiler::Binary<MUL_U>,
638 &ARBDecompiler::Binary<DIV_U>,
639 &ARBDecompiler::Binary<MIN_U>,
640 &ARBDecompiler::Binary<MAX_U>,
641 &ARBDecompiler::Unary<TRUNC_U>,
642 &ARBDecompiler::Unary<MOV_U>,
643 &ARBDecompiler::Binary<SHL_U>,
644 &ARBDecompiler::Binary<SHR_U>,
645 &ARBDecompiler::Binary<SHR_U>,
646 &ARBDecompiler::Binary<AND_U>,
647 &ARBDecompiler::Binary<OR_U>,
648 &ARBDecompiler::Binary<XOR_U>,
649 &ARBDecompiler::Unary<NOT_U>,
650 &ARBDecompiler::BitfieldInsert<'U'>,
651 &ARBDecompiler::BitfieldExtract<'U'>,
652 &ARBDecompiler::Unary<BTC_U>,
653 &ARBDecompiler::Unary<BTFM_U>,
654
655 &ARBDecompiler::HAdd2,
656 &ARBDecompiler::HMul2,
657 &ARBDecompiler::HFma2,
658 &ARBDecompiler::HAbsolute,
659 &ARBDecompiler::HNegate,
660 &ARBDecompiler::HClamp,
661 &ARBDecompiler::HCastFloat,
662 &ARBDecompiler::HUnpack,
663 &ARBDecompiler::HMergeF32,
664 &ARBDecompiler::HMergeH0,
665 &ARBDecompiler::HMergeH1,
666 &ARBDecompiler::HPack2,
667
668 &ARBDecompiler::LogicalAssign,
669 &ARBDecompiler::Binary<AND_U>,
670 &ARBDecompiler::Binary<OR_U>,
671 &ARBDecompiler::Binary<XOR_U>,
672 &ARBDecompiler::Unary<NOT_U>,
673 &ARBDecompiler::LogicalPick2,
674 &ARBDecompiler::LogicalAnd2,
675
676 &ARBDecompiler::FloatComparison<SLT_F, false>,
677 &ARBDecompiler::FloatComparison<SEQ_F, false>,
678 &ARBDecompiler::FloatComparison<SLE_F, false>,
679 &ARBDecompiler::FloatComparison<SGT_F, false>,
680 &ARBDecompiler::FloatComparison<SNE_F, false>,
681 &ARBDecompiler::FloatComparison<SGE_F, false>,
682 &ARBDecompiler::FloatOrdered,
683 &ARBDecompiler::FloatUnordered,
684 &ARBDecompiler::FloatComparison<SLT_F, true>,
685 &ARBDecompiler::FloatComparison<SEQ_F, true>,
686 &ARBDecompiler::FloatComparison<SLE_F, true>,
687 &ARBDecompiler::FloatComparison<SGT_F, true>,
688 &ARBDecompiler::FloatComparison<SNE_F, true>,
689 &ARBDecompiler::FloatComparison<SGE_F, true>,
690
691 &ARBDecompiler::Binary<SLT_S>,
692 &ARBDecompiler::Binary<SEQ_S>,
693 &ARBDecompiler::Binary<SLE_S>,
694 &ARBDecompiler::Binary<SGT_S>,
695 &ARBDecompiler::Binary<SNE_S>,
696 &ARBDecompiler::Binary<SGE_S>,
697
698 &ARBDecompiler::Binary<SLT_U>,
699 &ARBDecompiler::Binary<SEQ_U>,
700 &ARBDecompiler::Binary<SLE_U>,
701 &ARBDecompiler::Binary<SGT_U>,
702 &ARBDecompiler::Binary<SNE_U>,
703 &ARBDecompiler::Binary<SGE_U>,
704
705 &ARBDecompiler::LogicalAddCarry,
706
707 &ARBDecompiler::HalfComparison<SLT_F, false>,
708 &ARBDecompiler::HalfComparison<SEQ_F, false>,
709 &ARBDecompiler::HalfComparison<SLE_F, false>,
710 &ARBDecompiler::HalfComparison<SGT_F, false>,
711 &ARBDecompiler::HalfComparison<SNE_F, false>,
712 &ARBDecompiler::HalfComparison<SGE_F, false>,
713 &ARBDecompiler::HalfComparison<SLT_F, true>,
714 &ARBDecompiler::HalfComparison<SEQ_F, true>,
715 &ARBDecompiler::HalfComparison<SLE_F, true>,
716 &ARBDecompiler::HalfComparison<SGT_F, true>,
717 &ARBDecompiler::HalfComparison<SNE_F, true>,
718 &ARBDecompiler::HalfComparison<SGE_F, true>,
719
720 &ARBDecompiler::Texture,
721 &ARBDecompiler::Texture,
722 &ARBDecompiler::TextureGather,
723 &ARBDecompiler::TextureQueryDimensions,
724 &ARBDecompiler::TextureQueryLod,
725 &ARBDecompiler::TexelFetch,
726 &ARBDecompiler::TextureGradient,
727
728 &ARBDecompiler::ImageLoad,
729 &ARBDecompiler::ImageStore,
730
731 &ARBDecompiler::AtomicImage<ADD, U32>,
732 &ARBDecompiler::AtomicImage<AND, U32>,
733 &ARBDecompiler::AtomicImage<OR, U32>,
734 &ARBDecompiler::AtomicImage<XOR, U32>,
735 &ARBDecompiler::AtomicImage<EXCH, U32>,
736
737 &ARBDecompiler::Atomic<EXCH, U32>,
738 &ARBDecompiler::Atomic<ADD, U32>,
739 &ARBDecompiler::Atomic<MIN, U32>,
740 &ARBDecompiler::Atomic<MAX, U32>,
741 &ARBDecompiler::Atomic<AND, U32>,
742 &ARBDecompiler::Atomic<OR, U32>,
743 &ARBDecompiler::Atomic<XOR, U32>,
744
745 &ARBDecompiler::Atomic<EXCH, S32>,
746 &ARBDecompiler::Atomic<ADD, S32>,
747 &ARBDecompiler::Atomic<MIN, S32>,
748 &ARBDecompiler::Atomic<MAX, S32>,
749 &ARBDecompiler::Atomic<AND, S32>,
750 &ARBDecompiler::Atomic<OR, S32>,
751 &ARBDecompiler::Atomic<XOR, S32>,
752
753 &ARBDecompiler::Atomic<ADD, U32>,
754 &ARBDecompiler::Atomic<MIN, U32>,
755 &ARBDecompiler::Atomic<MAX, U32>,
756 &ARBDecompiler::Atomic<AND, U32>,
757 &ARBDecompiler::Atomic<OR, U32>,
758 &ARBDecompiler::Atomic<XOR, U32>,
759
760 &ARBDecompiler::Atomic<ADD, S32>,
761 &ARBDecompiler::Atomic<MIN, S32>,
762 &ARBDecompiler::Atomic<MAX, S32>,
763 &ARBDecompiler::Atomic<AND, S32>,
764 &ARBDecompiler::Atomic<OR, S32>,
765 &ARBDecompiler::Atomic<XOR, S32>,
766
767 &ARBDecompiler::Branch,
768 &ARBDecompiler::BranchIndirect,
769 &ARBDecompiler::PushFlowStack,
770 &ARBDecompiler::PopFlowStack,
771 &ARBDecompiler::Exit,
772 &ARBDecompiler::Discard,
773
774 &ARBDecompiler::EmitVertex,
775 &ARBDecompiler::EndPrimitive,
776
777 &ARBDecompiler::InvocationId,
778 &ARBDecompiler::YNegate,
779 &ARBDecompiler::LocalInvocationId<'x'>,
780 &ARBDecompiler::LocalInvocationId<'y'>,
781 &ARBDecompiler::LocalInvocationId<'z'>,
782 &ARBDecompiler::WorkGroupId<'x'>,
783 &ARBDecompiler::WorkGroupId<'y'>,
784 &ARBDecompiler::WorkGroupId<'z'>,
785
786 &ARBDecompiler::Unary<TGBALLOT_U>,
787 &ARBDecompiler::Unary<TGALL_U>,
788 &ARBDecompiler::Unary<TGANY_U>,
789 &ARBDecompiler::Unary<TGEQ_U>,
790
791 &ARBDecompiler::ThreadId,
792 &ARBDecompiler::ThreadMask<'e', 'q'>,
793 &ARBDecompiler::ThreadMask<'g', 'e'>,
794 &ARBDecompiler::ThreadMask<'g', 't'>,
795 &ARBDecompiler::ThreadMask<'l', 'e'>,
796 &ARBDecompiler::ThreadMask<'l', 't'>,
797 &ARBDecompiler::ShuffleIndexed,
798
799 &ARBDecompiler::Barrier,
800 &ARBDecompiler::MemoryBarrierGroup,
801 &ARBDecompiler::MemoryBarrierGlobal,
802 };
803};
804
805ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
806 ShaderType stage_, std::string_view identifier)
807 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
808 DefineGlobalMemory();
809
810 AddLine("TEMP RC;");
811 AddLine("TEMP FSWZA[4];");
812 AddLine("TEMP FSWZB[4];");
813 if (ir.IsDecompiled()) {
814 DecompileAST();
815 } else {
816 DecompileBranchMode();
817 }
818 AddLine("END");
819
820 const std::string code = std::move(shader_source);
821 DeclareHeader();
822 DeclareVertex();
823 DeclareGeometry();
824 DeclareFragment();
825 DeclareCompute();
826 DeclareInputAttributes();
827 DeclareOutputAttributes();
828 DeclareLocalMemory();
829 DeclareGlobalMemory();
830 DeclareConstantBuffers();
831 DeclareRegisters();
832 DeclareTemporaries();
833 DeclarePredicates();
834 DeclareInternalFlags();
835
836 shader_source += code;
837}
838
839std::string_view HeaderStageName(ShaderType stage) {
840 switch (stage) {
841 case ShaderType::Vertex:
842 return "vp";
843 case ShaderType::Geometry:
844 return "gp";
845 case ShaderType::Fragment:
846 return "fp";
847 case ShaderType::Compute:
848 return "cp";
849 default:
850 UNREACHABLE();
851 return "";
852 }
853}
854
855void ARBDecompiler::DefineGlobalMemory() {
856 u32 binding = 0;
857 for (const auto& pair : ir.GetGlobalMemory()) {
858 const GlobalMemoryBase base = pair.first;
859 global_memory_names.emplace(base, binding);
860 ++binding;
861 }
862}
863
864void ARBDecompiler::DeclareHeader() {
865 AddLine("!!NV{}5.0", HeaderStageName(stage));
866 // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
867 AddLine("OPTION NV_internal;");
868 AddLine("OPTION NV_gpu_program_fp64;");
869 AddLine("OPTION NV_shader_thread_group;");
870 if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
871 AddLine("OPTION NV_shader_thread_shuffle;");
872 }
873 if (stage == ShaderType::Vertex) {
874 if (device.HasNvViewportArray2()) {
875 AddLine("OPTION NV_viewport_array2;");
876 }
877 }
878 if (stage == ShaderType::Fragment) {
879 AddLine("OPTION ARB_draw_buffers;");
880 }
881 if (device.HasImageLoadFormatted()) {
882 AddLine("OPTION EXT_shader_image_load_formatted;");
883 }
884}
885
886void ARBDecompiler::DeclareVertex() {
887 if (stage != ShaderType::Vertex) {
888 return;
889 }
890 AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
891}
892
893void ARBDecompiler::DeclareGeometry() {
894 if (stage != ShaderType::Geometry) {
895 return;
896 }
897 const auto& info = registry.GetGraphicsInfo();
898 const auto& header = ir.GetHeader();
899 AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
900 AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
901 AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
902 AddLine("ATTRIB vertex_position = vertex.position;");
903}
904
905void ARBDecompiler::DeclareFragment() {
906 if (stage != ShaderType::Fragment) {
907 return;
908 }
909 AddLine("OUTPUT result_color7 = result.color[7];");
910 AddLine("OUTPUT result_color6 = result.color[6];");
911 AddLine("OUTPUT result_color5 = result.color[5];");
912 AddLine("OUTPUT result_color4 = result.color[4];");
913 AddLine("OUTPUT result_color3 = result.color[3];");
914 AddLine("OUTPUT result_color2 = result.color[2];");
915 AddLine("OUTPUT result_color1 = result.color[1];");
916 AddLine("OUTPUT result_color0 = result.color;");
917}
918
919void ARBDecompiler::DeclareCompute() {
920 if (stage != ShaderType::Compute) {
921 return;
922 }
923 const ComputeInfo& info = registry.GetComputeInfo();
924 AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
925 info.workgroup_size[2]);
926 if (info.shared_memory_size_in_words == 0) {
927 return;
928 }
929 const u32 limit = device.GetMaxComputeSharedMemorySize();
930 u32 size_in_bytes = info.shared_memory_size_in_words * 4;
931 if (size_in_bytes > limit) {
932 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
933 size_in_bytes, limit);
934 size_in_bytes = limit;
935 }
936
937 AddLine("SHARED_MEMORY {};", size_in_bytes);
938 AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
939}
940
941void ARBDecompiler::DeclareInputAttributes() {
942 if (stage == ShaderType::Compute) {
943 return;
944 }
945 const std::string_view stage_name = StageInputName(stage);
946 for (const auto attribute : ir.GetInputAttributes()) {
947 if (!IsGenericAttribute(attribute)) {
948 continue;
949 }
950 const u32 index = GetGenericAttributeIndex(attribute);
951
952 std::string_view suffix;
953 if (stage == ShaderType::Fragment) {
954 const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
955 if (input_mode == PixelImap::Unused) {
956 return;
957 }
958 suffix = GetInputFlags(input_mode);
959 }
960 AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
961 index);
962 }
963}
964
965void ARBDecompiler::DeclareOutputAttributes() {
966 if (stage == ShaderType::Compute) {
967 return;
968 }
969 for (const auto attribute : ir.GetOutputAttributes()) {
970 if (!IsGenericAttribute(attribute)) {
971 continue;
972 }
973 const u32 index = GetGenericAttributeIndex(attribute);
974 AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
975 }
976}
977
978void ARBDecompiler::DeclareLocalMemory() {
979 u64 size = 0;
980 if (stage == ShaderType::Compute) {
981 size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
982 } else {
983 size = ir.GetHeader().GetLocalMemorySize();
984 }
985 if (size == 0) {
986 return;
987 }
988 const u64 element_count = Common::AlignUp(size, 4) / 4;
989 AddLine("TEMP lmem[{}];", element_count);
990}
991
992void ARBDecompiler::DeclareGlobalMemory() {
993 const size_t num_entries = ir.GetGlobalMemory().size();
994 if (num_entries > 0) {
995 AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
996 }
997}
998
999void ARBDecompiler::DeclareConstantBuffers() {
1000 u32 binding = 0;
1001 for (const auto& cbuf : ir.GetConstantBuffers()) {
1002 AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
1003 ++binding;
1004 }
1005}
1006
1007void ARBDecompiler::DeclareRegisters() {
1008 for (const u32 gpr : ir.GetRegisters()) {
1009 AddLine("TEMP R{};", gpr);
1010 }
1011}
1012
1013void ARBDecompiler::DeclareTemporaries() {
1014 for (std::size_t i = 0; i < max_temporaries; ++i) {
1015 AddLine("TEMP T{};", i);
1016 }
1017 for (std::size_t i = 0; i < max_long_temporaries; ++i) {
1018 AddLine("LONG TEMP L{};", i);
1019 }
1020}
1021
1022void ARBDecompiler::DeclarePredicates() {
1023 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1024 AddLine("TEMP P{};", static_cast<u64>(pred));
1025 }
1026}
1027
1028void ARBDecompiler::DeclareInternalFlags() {
1029 for (const char* name : INTERNAL_FLAG_NAMES) {
1030 AddLine("TEMP {};", name);
1031 }
1032}
1033
1034void ARBDecompiler::InitializeVariables() {
1035 AddLine("MOV.F32 FSWZA[0], -1;");
1036 AddLine("MOV.F32 FSWZA[1], 1;");
1037 AddLine("MOV.F32 FSWZA[2], -1;");
1038 AddLine("MOV.F32 FSWZA[3], 0;");
1039 AddLine("MOV.F32 FSWZB[0], -1;");
1040 AddLine("MOV.F32 FSWZB[1], -1;");
1041 AddLine("MOV.F32 FSWZB[2], 1;");
1042 AddLine("MOV.F32 FSWZB[3], -1;");
1043
1044 if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
1045 AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
1046 }
1047 for (const auto attribute : ir.GetOutputAttributes()) {
1048 if (!IsGenericAttribute(attribute)) {
1049 continue;
1050 }
1051 const u32 index = GetGenericAttributeIndex(attribute);
1052 AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
1053 }
1054 for (const u32 gpr : ir.GetRegisters()) {
1055 AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
1056 }
1057 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1058 AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
1059 }
1060}
1061
1062void ARBDecompiler::DecompileAST() {
1063 const u32 num_flow_variables = ir.GetASTNumVariables();
1064 for (u32 i = 0; i < num_flow_variables; ++i) {
1065 AddLine("TEMP F{};", i);
1066 }
1067 for (u32 i = 0; i < num_flow_variables; ++i) {
1068 AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
1069 }
1070
1071 InitializeVariables();
1072
1073 VisitAST(ir.GetASTProgram());
1074}
1075
1076void ARBDecompiler::DecompileBranchMode() {
1077 static constexpr u32 FLOW_STACK_SIZE = 20;
1078 if (!ir.IsFlowStackDisabled()) {
1079 AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
1080 AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
1081 AddLine("TEMP SSY_TOP;");
1082 AddLine("TEMP PBK_TOP;");
1083 }
1084
1085 AddLine("TEMP PC;");
1086
1087 if (!ir.IsFlowStackDisabled()) {
1088 AddLine("MOV.U SSY_TOP.x, 0;");
1089 AddLine("MOV.U PBK_TOP.x, 0;");
1090 }
1091
1092 InitializeVariables();
1093
1094 const auto basic_block_end = ir.GetBasicBlocks().end();
1095 auto basic_block_it = ir.GetBasicBlocks().begin();
1096 const u32 first_address = basic_block_it->first;
1097 AddLine("MOV.U PC.x, {};", first_address);
1098
1099 AddLine("REP;");
1100
1101 std::size_t num_blocks = 0;
1102 while (basic_block_it != basic_block_end) {
1103 const auto& [address, bb] = *basic_block_it;
1104 ++num_blocks;
1105
1106 AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
1107 AddLine("IF NE.x;");
1108
1109 VisitBlock(bb);
1110
1111 ++basic_block_it;
1112
1113 if (basic_block_it != basic_block_end) {
1114 const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
1115 if (!op || op->GetCode() != OperationCode::Branch) {
1116 const u32 next_address = basic_block_it->first;
1117 AddLine("MOV.U PC.x, {};", next_address);
1118 AddLine("CONT;");
1119 }
1120 }
1121
1122 AddLine("ELSE;");
1123 }
1124 AddLine("RET;");
1125 while (num_blocks--) {
1126 AddLine("ENDIF;");
1127 }
1128
1129 AddLine("ENDREP;");
1130}
1131
1132void ARBDecompiler::VisitAST(const ASTNode& node) {
1133 if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
1134 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1135 VisitAST(current);
1136 }
1137 } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
1138 const std::string condition = VisitExpression(if_then->condition);
1139 ResetTemporaries();
1140
1141 AddLine("MOVC.U RC.x, {};", condition);
1142 AddLine("IF NE.x;");
1143 for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
1144 VisitAST(current);
1145 }
1146 AddLine("ENDIF;");
1147 } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
1148 AddLine("ELSE;");
1149 for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
1150 VisitAST(current);
1151 }
1152 } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
1153 VisitBlock(decoded->nodes);
1154 } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
1155 AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
1156 ResetTemporaries();
1157 } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
1158 const std::string condition = VisitExpression(do_while->condition);
1159 ResetTemporaries();
1160 AddLine("REP;");
1161 for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
1162 VisitAST(current);
1163 }
1164 AddLine("MOVC.U RC.x, {};", condition);
1165 AddLine("BRK (NE.x);");
1166 AddLine("ENDREP;");
1167 } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
1168 const bool is_true = ExprIsTrue(ast_return->condition);
1169 if (!is_true) {
1170 AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
1171 AddLine("IF NE.x;");
1172 ResetTemporaries();
1173 }
1174 if (ast_return->kills) {
1175 AddLine("KIL TR;");
1176 } else {
1177 Exit();
1178 }
1179 if (!is_true) {
1180 AddLine("ENDIF;");
1181 }
1182 } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
1183 if (ExprIsTrue(ast_break->condition)) {
1184 AddLine("BRK;");
1185 } else {
1186 AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
1187 AddLine("BRK (NE.x);");
1188 ResetTemporaries();
1189 }
1190 } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
1191 // Nothing to do
1192 } else {
1193 UNREACHABLE();
1194 }
1195}
1196
1197std::string ARBDecompiler::VisitExpression(const Expr& node) {
1198 if (const auto expr = std::get_if<ExprAnd>(&*node)) {
1199 std::string result = AllocTemporary();
1200 AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
1201 VisitExpression(expr->operand2));
1202 return result;
1203 }
1204 if (const auto expr = std::get_if<ExprOr>(&*node)) {
1205 std::string result = AllocTemporary();
1206 AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
1207 VisitExpression(expr->operand2));
1208 return result;
1209 }
1210 if (const auto expr = std::get_if<ExprNot>(&*node)) {
1211 std::string result = AllocTemporary();
1212 AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
1213 return result;
1214 }
1215 if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
1216 return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
1217 }
1218 if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
1219 return Visit(ir.GetConditionCode(expr->cc));
1220 }
1221 if (const auto expr = std::get_if<ExprVar>(&*node)) {
1222 return fmt::format("F{}.x", expr->var_index);
1223 }
1224 if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
1225 return expr->value ? "0xffffffff" : "0";
1226 }
1227 if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
1228 std::string result = AllocTemporary();
1229 AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
1230 return result;
1231 }
1232 UNREACHABLE();
1233 return "0";
1234}
1235
1236void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
1237 for (const auto& node : bb) {
1238 Visit(node);
1239 }
1240}
1241
1242std::string ARBDecompiler::Visit(const Node& node) {
1243 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1244 if (const auto amend_index = operation->GetAmendIndex()) {
1245 Visit(ir.GetAmendNode(*amend_index));
1246 }
1247 const std::size_t index = static_cast<std::size_t>(operation->GetCode());
1248 if (index >= OPERATION_DECOMPILERS.size()) {
1249 UNREACHABLE_MSG("Out of bounds operation: {}", index);
1250 return {};
1251 }
1252 const auto decompiler = OPERATION_DECOMPILERS[index];
1253 if (decompiler == nullptr) {
1254 UNREACHABLE_MSG("Undefined operation: {}", index);
1255 return {};
1256 }
1257 return (this->*decompiler)(*operation);
1258 }
1259
1260 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1261 const u32 index = gpr->GetIndex();
1262 if (index == Register::ZeroIndex) {
1263 return "{0, 0, 0, 0}.x";
1264 }
1265 return fmt::format("R{}.x", index);
1266 }
1267
1268 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1269 return fmt::format("CV{}.x", cv->GetIndex());
1270 }
1271
1272 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1273 std::string temporary = AllocTemporary();
1274 AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
1275 return temporary;
1276 }
1277
1278 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1279 std::string temporary = AllocTemporary();
1280 switch (const auto index = predicate->GetIndex(); index) {
1281 case Tegra::Shader::Pred::UnusedIndex:
1282 AddLine("MOV.S {}, -1;", temporary);
1283 break;
1284 case Tegra::Shader::Pred::NeverExecute:
1285 AddLine("MOV.S {}, 0;", temporary);
1286 break;
1287 default:
1288 AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
1289 break;
1290 }
1291 if (predicate->IsNegated()) {
1292 AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
1293 }
1294 return temporary;
1295 }
1296
1297 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1298 if (abuf->IsPhysicalBuffer()) {
1299 UNIMPLEMENTED_MSG("Physical buffers are not implemented");
1300 return "{0, 0, 0, 0}.x";
1301 }
1302
1303 const Attribute::Index index = abuf->GetIndex();
1304 const u32 element = abuf->GetElement();
1305 const char swizzle = Swizzle(element);
1306 switch (index) {
1307 case Attribute::Index::Position: {
1308 if (stage == ShaderType::Geometry) {
1309 return fmt::format("{}_position[{}].{}", StageInputName(stage),
1310 Visit(abuf->GetBuffer()), swizzle);
1311 } else {
1312 return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
1313 }
1314 }
1315 case Attribute::Index::TessCoordInstanceIDVertexID:
1316 ASSERT(stage == ShaderType::Vertex);
1317 switch (element) {
1318 case 2:
1319 return "vertex.instance";
1320 case 3:
1321 return "vertex.id";
1322 }
1323 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1324 break;
1325 case Attribute::Index::PointCoord:
1326 switch (element) {
1327 case 0:
1328 return "fragment.pointcoord.x";
1329 case 1:
1330 return "fragment.pointcoord.y";
1331 }
1332 UNIMPLEMENTED();
1333 break;
1334 case Attribute::Index::FrontFacing: {
1335 ASSERT(stage == ShaderType::Fragment);
1336 ASSERT(element == 3);
1337 const std::string temporary = AllocVectorTemporary();
1338 AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
1339 AddLine("MOV.U.CC RC.x, -RC;");
1340 AddLine("MOV.S {}.x, 0;", temporary);
1341 AddLine("MOV.S {}.x (NE.x), -1;", temporary);
1342 return fmt::format("{}.x", temporary);
1343 }
1344 default:
1345 if (IsGenericAttribute(index)) {
1346 if (stage == ShaderType::Geometry) {
1347 return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
1348 Visit(abuf->GetBuffer()), swizzle);
1349 } else {
1350 return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
1351 GetGenericAttributeIndex(index), swizzle);
1352 }
1353 }
1354 UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
1355 break;
1356 }
1357 return "{0, 0, 0, 0}.x";
1358 }
1359
1360 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1361 std::string offset_string;
1362 const auto& offset = cbuf->GetOffset();
1363 if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
1364 offset_string = std::to_string(imm->GetValue());
1365 } else {
1366 offset_string = Visit(offset);
1367 }
1368 std::string temporary = AllocTemporary();
1369 AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
1370 return temporary;
1371 }
1372
1373 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1374 std::string temporary = AllocTemporary();
1375 AddLine("MOV {}, 0;", temporary);
1376 AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
1377 return temporary;
1378 }
1379
1380 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1381 std::string temporary = Visit(lmem->GetAddress());
1382 AddLine("SHR.U {}, {}, 2;", temporary, temporary);
1383 AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
1384 return temporary;
1385 }
1386
1387 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1388 std::string temporary = Visit(smem->GetAddress());
1389 AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
1390 return temporary;
1391 }
1392
1393 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1394 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1395 return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1396 }
1397
1398 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1399 if (const auto amend_index = conditional->GetAmendIndex()) {
1400 Visit(ir.GetAmendNode(*amend_index));
1401 }
1402 AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
1403 AddLine("IF NE.x;");
1404 VisitBlock(conditional->GetCode());
1405 AddLine("ENDIF;");
1406 return {};
1407 }
1408
1409 if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
1410 // Uncommenting this will generate invalid code. GLASM lacks comments.
1411 // AddLine("// {}", cmt->GetText());
1412 return {};
1413 }
1414
1415 UNIMPLEMENTED();
1416 return {};
1417}
1418
1419std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
1420 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1421 UNIMPLEMENTED_IF(meta.sampler.is_indexed);
1422
1423 const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
1424 meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
1425 const std::size_t count = operation.GetOperandsCount();
1426 std::string temporary = AllocVectorTemporary();
1427 std::size_t i = 0;
1428 for (; i < count; ++i) {
1429 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1430 }
1431 if (meta.sampler.is_array) {
1432 AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
1433 ++i;
1434 }
1435 if (meta.sampler.is_shadow) {
1436 std::string compare = Visit(meta.depth_compare);
1437 if (is_extended) {
1438 ASSERT(i == 4);
1439 std::string extra_coord = AllocVectorTemporary();
1440 AddLine("MOV.F {}.x, {};", extra_coord, compare);
1441 return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
1442 }
1443 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
1444 ++i;
1445 }
1446 return {temporary, temporary, i};
1447}
1448
1449std::string ARBDecompiler::BuildAoffi(Operation operation) {
1450 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1451 if (meta.aoffi.empty()) {
1452 return {};
1453 }
1454 const std::string temporary = AllocVectorTemporary();
1455 std::size_t i = 0;
1456 for (auto& node : meta.aoffi) {
1457 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
1458 }
1459 return fmt::format(", offset({})", temporary);
1460}
1461
1462std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
1463 // Read a bindless SSBO, return its address and set CC accordingly
1464 // address = c[binding].xy
1465 // length = c[binding].z
1466 const u32 binding = global_memory_names.at(gmem.GetDescriptor());
1467
1468 const std::string pointer = AllocLongVectorTemporary();
1469 std::string temporary = AllocTemporary();
1470
1471 AddLine("PK64.U {}, c[{}];", pointer, binding);
1472 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
1473 Visit(gmem.GetBaseAddress()));
1474 AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
1475 AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
1476 // Compare offset to length and set CC
1477 AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
1478 return fmt::format("{}.x", pointer);
1479}
1480
1481void ARBDecompiler::Exit() {
1482 if (stage != ShaderType::Fragment) {
1483 AddLine("RET;");
1484 return;
1485 }
1486
1487 const auto safe_get_register = [this](u32 reg) -> std::string {
1488 if (ir.GetRegisters().contains(reg)) {
1489 return fmt::format("R{}.x", reg);
1490 }
1491 return "{0, 0, 0, 0}.x";
1492 };
1493
1494 const auto& header = ir.GetHeader();
1495 u32 current_reg = 0;
1496 for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
1497 for (u32 component = 0; component < 4; ++component) {
1498 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
1499 continue;
1500 }
1501 AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
1502 safe_get_register(current_reg));
1503 ++current_reg;
1504 }
1505 }
1506 if (header.ps.omap.depth) {
1507 AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
1508 }
1509
1510 AddLine("RET;");
1511}
1512
1513std::string ARBDecompiler::Assign(Operation operation) {
1514 const Node& dest = operation[0];
1515 const Node& src = operation[1];
1516
1517 std::string dest_name;
1518 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1519 if (gpr->GetIndex() == Register::ZeroIndex) {
1520 // Writing to Register::ZeroIndex is a no op
1521 return {};
1522 }
1523 dest_name = fmt::format("R{}.x", gpr->GetIndex());
1524 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1525 const u32 element = abuf->GetElement();
1526 const char swizzle = Swizzle(element);
1527 switch (const Attribute::Index index = abuf->GetIndex()) {
1528 case Attribute::Index::Position:
1529 dest_name = fmt::format("result.position.{}", swizzle);
1530 break;
1531 case Attribute::Index::LayerViewportPointSize:
1532 switch (element) {
1533 case 0:
1534 UNIMPLEMENTED();
1535 return {};
1536 case 1:
1537 case 2:
1538 if (!device.HasNvViewportArray2()) {
1539 LOG_ERROR(
1540 Render_OpenGL,
1541 "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
1542 return {};
1543 }
1544 dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
1545 break;
1546 case 3:
1547 dest_name = "result.pointsize.x";
1548 break;
1549 }
1550 break;
1551 case Attribute::Index::ClipDistances0123:
1552 dest_name = fmt::format("result.clip[{}].x", element);
1553 break;
1554 case Attribute::Index::ClipDistances4567:
1555 dest_name = fmt::format("result.clip[{}].x", element + 4);
1556 break;
1557 default:
1558 if (!IsGenericAttribute(index)) {
1559 UNREACHABLE();
1560 return {};
1561 }
1562 dest_name =
1563 fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
1564 break;
1565 }
1566 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1567 const std::string address = Visit(lmem->GetAddress());
1568 AddLine("SHR.U {}, {}, 2;", address, address);
1569 dest_name = fmt::format("lmem[{}].x", address);
1570 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1571 AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
1572 ResetTemporaries();
1573 return {};
1574 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1575 AddLine("IF NE.x;");
1576 AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
1577 AddLine("ENDIF;");
1578 ResetTemporaries();
1579 return {};
1580 } else {
1581 UNREACHABLE();
1582 ResetTemporaries();
1583 return {};
1584 }
1585
1586 AddLine("MOV.U {}, {};", dest_name, Visit(src));
1587 ResetTemporaries();
1588 return {};
1589}
1590
1591std::string ARBDecompiler::Select(Operation operation) {
1592 std::string temporary = AllocTemporary();
1593 AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
1594 Visit(operation[2]));
1595 return temporary;
1596}
1597
1598std::string ARBDecompiler::FClamp(Operation operation) {
1599 // 1.0f in hex, replace with std::bit_cast on C++20
1600 static constexpr u32 POSITIVE_ONE = 0x3f800000;
1601
1602 std::string temporary = AllocTemporary();
1603 const Node& value = operation[0];
1604 const Node& low = operation[1];
1605 const Node& high = operation[2];
1606 const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
1607 const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
1608 if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
1609 AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
1610 } else {
1611 AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
1612 AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
1613 }
1614 return temporary;
1615}
1616
1617std::string ARBDecompiler::FCastHalf0(Operation operation) {
1618 const std::string temporary = AllocVectorTemporary();
1619 AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
1620 return fmt::format("{}.x", temporary);
1621}
1622
1623std::string ARBDecompiler::FCastHalf1(Operation operation) {
1624 const std::string temporary = AllocVectorTemporary();
1625 AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
1626 AddLine("MOV {}.x, {}.y;", temporary, temporary);
1627 return fmt::format("{}.x", temporary);
1628}
1629
1630std::string ARBDecompiler::FSqrt(Operation operation) {
1631 std::string temporary = AllocTemporary();
1632 AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
1633 AddLine("RCP.F32 {}, {};", temporary, temporary);
1634 return temporary;
1635}
1636
1637std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
1638 const std::string temporary = AllocVectorTemporary();
1639 if (!device.HasWarpIntrinsics()) {
1640 LOG_ERROR(Render_OpenGL,
1641 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
1642 AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
1643 return fmt::format("{}.x", temporary);
1644 }
1645
1646 AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
1647 AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
1648 AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
1649 AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
1650 AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
1651 AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
1652 AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
1653 return fmt::format("{}.x", temporary);
1654}
1655
1656std::string ARBDecompiler::HAdd2(Operation operation) {
1657 const std::string tmp1 = AllocVectorTemporary();
1658 const std::string tmp2 = AllocVectorTemporary();
1659 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1660 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1661 AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1662 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1663 return fmt::format("{}.x", tmp1);
1664}
1665
1666std::string ARBDecompiler::HMul2(Operation operation) {
1667 const std::string tmp1 = AllocVectorTemporary();
1668 const std::string tmp2 = AllocVectorTemporary();
1669 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1670 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1671 AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1672 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1673 return fmt::format("{}.x", tmp1);
1674}
1675
1676std::string ARBDecompiler::HFma2(Operation operation) {
1677 const std::string tmp1 = AllocVectorTemporary();
1678 const std::string tmp2 = AllocVectorTemporary();
1679 const std::string tmp3 = AllocVectorTemporary();
1680 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1681 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1682 AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
1683 AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
1684 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1685 return fmt::format("{}.x", tmp1);
1686}
1687
1688std::string ARBDecompiler::HAbsolute(Operation operation) {
1689 const std::string temporary = AllocVectorTemporary();
1690 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1691 AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
1692 return fmt::format("{}.x", temporary);
1693}
1694
1695std::string ARBDecompiler::HNegate(Operation operation) {
1696 const std::string temporary = AllocVectorTemporary();
1697 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1698 AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
1699 AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
1700 AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
1701 AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
1702 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1703 return fmt::format("{}.x", temporary);
1704}
1705
1706std::string ARBDecompiler::HClamp(Operation operation) {
1707 const std::string tmp1 = AllocVectorTemporary();
1708 const std::string tmp2 = AllocVectorTemporary();
1709 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1710 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
1711 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1712 AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
1713 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
1714 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1715 AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
1716 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1717 return fmt::format("{}.x", tmp1);
1718}
1719
1720std::string ARBDecompiler::HCastFloat(Operation operation) {
1721 const std::string temporary = AllocVectorTemporary();
1722 AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
1723 AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
1724 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1725 return fmt::format("{}.x", temporary);
1726}
1727
1728std::string ARBDecompiler::HUnpack(Operation operation) {
1729 std::string operand = Visit(operation[0]);
1730 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1731 case Tegra::Shader::HalfType::H0_H1:
1732 return operand;
1733 case Tegra::Shader::HalfType::F32: {
1734 const std::string temporary = AllocVectorTemporary();
1735 AddLine("MOV.U {}.x, {};", temporary, operand);
1736 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1737 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1738 return fmt::format("{}.x", temporary);
1739 }
1740 case Tegra::Shader::HalfType::H0_H0: {
1741 const std::string temporary = AllocVectorTemporary();
1742 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1743 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1744 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1745 return fmt::format("{}.x", temporary);
1746 }
1747 case Tegra::Shader::HalfType::H1_H1: {
1748 const std::string temporary = AllocVectorTemporary();
1749 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1750 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
1751 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1752 return fmt::format("{}.x", temporary);
1753 }
1754 }
1755 UNREACHABLE();
1756 return "{0, 0, 0, 0}.x";
1757}
1758
1759std::string ARBDecompiler::HMergeF32(Operation operation) {
1760 const std::string temporary = AllocVectorTemporary();
1761 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1762 return fmt::format("{}.x", temporary);
1763}
1764
1765std::string ARBDecompiler::HMergeH0(Operation operation) {
1766 const std::string temporary = AllocVectorTemporary();
1767 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1768 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1769 AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
1770 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1771 return fmt::format("{}.x", temporary);
1772}
1773
1774std::string ARBDecompiler::HMergeH1(Operation operation) {
1775 const std::string temporary = AllocVectorTemporary();
1776 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1777 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1778 AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
1779 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1780 return fmt::format("{}.x", temporary);
1781}
1782
1783std::string ARBDecompiler::HPack2(Operation operation) {
1784 const std::string temporary = AllocVectorTemporary();
1785 AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
1786 AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
1787 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1788 return fmt::format("{}.x", temporary);
1789}
1790
1791std::string ARBDecompiler::LogicalAssign(Operation operation) {
1792 const Node& dest = operation[0];
1793 const Node& src = operation[1];
1794
1795 std::string target;
1796
1797 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1798 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1799
1800 const Tegra::Shader::Pred index = pred->GetIndex();
1801 switch (index) {
1802 case Tegra::Shader::Pred::NeverExecute:
1803 case Tegra::Shader::Pred::UnusedIndex:
1804 // Writing to these predicates is a no-op
1805 return {};
1806 }
1807 target = fmt::format("P{}.x", static_cast<u64>(index));
1808 } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
1809 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1810 target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1811 } else {
1812 UNREACHABLE();
1813 ResetTemporaries();
1814 return {};
1815 }
1816
1817 AddLine("MOV.U {}, {};", target, Visit(src));
1818 ResetTemporaries();
1819 return {};
1820}
1821
1822std::string ARBDecompiler::LogicalPick2(Operation operation) {
1823 std::string temporary = AllocTemporary();
1824 const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
1825 AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
1826 return temporary;
1827}
1828
1829std::string ARBDecompiler::LogicalAnd2(Operation operation) {
1830 std::string temporary = AllocTemporary();
1831 const std::string op = Visit(operation[0]);
1832 AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
1833 return temporary;
1834}
1835
1836std::string ARBDecompiler::FloatOrdered(Operation operation) {
1837 std::string temporary = AllocTemporary();
1838 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1839 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1840 AddLine("MOV.S {}, -1;", temporary);
1841 AddLine("MOV.S {} (NAN.x), 0;", temporary);
1842 AddLine("MOV.S {} (NAN.y), 0;", temporary);
1843 return temporary;
1844}
1845
1846std::string ARBDecompiler::FloatUnordered(Operation operation) {
1847 std::string temporary = AllocTemporary();
1848 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1849 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1850 AddLine("MOV.S {}, 0;", temporary);
1851 AddLine("MOV.S {} (NAN.x), -1;", temporary);
1852 AddLine("MOV.S {} (NAN.y), -1;", temporary);
1853 return temporary;
1854}
1855
1856std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
1857 std::string temporary = AllocTemporary();
1858 AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
1859 AddLine("MOV.S {}, 0;", temporary);
1860 AddLine("IF CF.x;");
1861 AddLine("MOV.S {}, -1;", temporary);
1862 AddLine("ENDIF;");
1863 return temporary;
1864}
1865
1866std::string ARBDecompiler::Texture(Operation operation) {
1867 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1868 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1869 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1870
1871 std::string_view opcode = "TEX";
1872 std::string extra;
1873 if (meta.bias) {
1874 ASSERT(!meta.lod);
1875 opcode = "TXB";
1876
1877 if (swizzle < 4) {
1878 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
1879 } else {
1880 const std::string bias = AllocTemporary();
1881 AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
1882 extra = fmt::format(" {},", bias);
1883 }
1884 }
1885 if (meta.lod) {
1886 ASSERT(!meta.bias);
1887 opcode = "TXL";
1888
1889 if (swizzle < 4) {
1890 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1891 } else {
1892 const std::string lod = AllocTemporary();
1893 AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
1894 extra = fmt::format(" {},", lod);
1895 }
1896 }
1897
1898 AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
1899 TextureType(meta), BuildAoffi(operation));
1900 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1901 return fmt::format("{}.x", temporary);
1902}
1903
1904std::string ARBDecompiler::TextureGather(Operation operation) {
1905 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1906 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1907 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1908
1909 std::string comp;
1910 if (!meta.sampler.is_shadow) {
1911 const auto& immediate = std::get<ImmediateNode>(*meta.component);
1912 comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
1913 }
1914
1915 AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
1916 TextureType(meta), BuildAoffi(operation));
1917 AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
1918 return fmt::format("{}.x", temporary);
1919}
1920
1921std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
1922 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1923 const std::string temporary = AllocVectorTemporary();
1924 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1925
1926 ASSERT(!meta.sampler.is_array);
1927
1928 const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
1929 AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
1930 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1931 return fmt::format("{}.x", temporary);
1932}
1933
1934std::string ARBDecompiler::TextureQueryLod(Operation operation) {
1935 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1936 const std::string temporary = AllocVectorTemporary();
1937 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1938
1939 ASSERT(!meta.sampler.is_array);
1940
1941 const std::size_t count = operation.GetOperandsCount();
1942 for (std::size_t i = 0; i < count; ++i) {
1943 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1944 }
1945 AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
1946 AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
1947 AddLine("TRUNC.S {}, {};", temporary, temporary);
1948 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1949 return fmt::format("{}.x", temporary);
1950}
1951
1952std::string ARBDecompiler::TexelFetch(Operation operation) {
1953 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1954 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1955 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1956
1957 if (!meta.sampler.is_buffer) {
1958 ASSERT(swizzle < 4);
1959 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1960 }
1961 AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
1962 BuildAoffi(operation));
1963 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1964 return fmt::format("{}.x", temporary);
1965}
1966
1967std::string ARBDecompiler::TextureGradient(Operation operation) {
1968 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1969 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1970 const std::string ddx = AllocVectorTemporary();
1971 const std::string ddy = AllocVectorTemporary();
1972 const std::string coord = std::get<1>(BuildCoords(operation));
1973
1974 const std::size_t num_components = meta.derivates.size() / 2;
1975 for (std::size_t index = 0; index < num_components; ++index) {
1976 const char swizzle = Swizzle(index);
1977 AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
1978 AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
1979 }
1980
1981 const std::string_view result = coord;
1982 AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
1983 TextureType(meta), BuildAoffi(operation));
1984 AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
1985 return fmt::format("{}.x", result);
1986}
1987
1988std::string ARBDecompiler::ImageLoad(Operation operation) {
1989 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1990 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1991 const std::size_t count = operation.GetOperandsCount();
1992 const std::string_view type = ImageType(meta.image.type);
1993
1994 const std::string temporary = AllocVectorTemporary();
1995 for (std::size_t i = 0; i < count; ++i) {
1996 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1997 }
1998 AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
1999 AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
2000 return fmt::format("{}.x", temporary);
2001}
2002
2003std::string ARBDecompiler::ImageStore(Operation operation) {
2004 const auto& meta = std::get<MetaImage>(operation.GetMeta());
2005 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
2006 const std::size_t num_coords = operation.GetOperandsCount();
2007 const std::size_t num_values = meta.values.size();
2008 const std::string_view type = ImageType(meta.image.type);
2009
2010 const std::string coord = AllocVectorTemporary();
2011 const std::string value = AllocVectorTemporary();
2012 for (std::size_t i = 0; i < num_coords; ++i) {
2013 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
2014 }
2015 for (std::size_t i = 0; i < num_values; ++i) {
2016 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
2017 }
2018 AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
2019 return {};
2020}
2021
2022std::string ARBDecompiler::Branch(Operation operation) {
2023 const auto target = std::get<ImmediateNode>(*operation[0]);
2024 AddLine("MOV.U PC.x, {};", target.GetValue());
2025 AddLine("CONT;");
2026 return {};
2027}
2028
2029std::string ARBDecompiler::BranchIndirect(Operation operation) {
2030 AddLine("MOV.U PC.x, {};", Visit(operation[0]));
2031 AddLine("CONT;");
2032 return {};
2033}
2034
2035std::string ARBDecompiler::PushFlowStack(Operation operation) {
2036 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2037 const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
2038 const std::string_view stack_name = StackName(stack);
2039 AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
2040 AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
2041 return {};
2042}
2043
2044std::string ARBDecompiler::PopFlowStack(Operation operation) {
2045 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2046 const std::string_view stack_name = StackName(stack);
2047 AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
2048 AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
2049 AddLine("CONT;");
2050 return {};
2051}
2052
2053std::string ARBDecompiler::Exit(Operation) {
2054 Exit();
2055 return {};
2056}
2057
2058std::string ARBDecompiler::Discard(Operation) {
2059 AddLine("KIL TR;");
2060 return {};
2061}
2062
2063std::string ARBDecompiler::EmitVertex(Operation) {
2064 AddLine("EMIT;");
2065 return {};
2066}
2067
2068std::string ARBDecompiler::EndPrimitive(Operation) {
2069 AddLine("ENDPRIM;");
2070 return {};
2071}
2072
2073std::string ARBDecompiler::InvocationId(Operation) {
2074 return "primitive.invocation";
2075}
2076
2077std::string ARBDecompiler::YNegate(Operation) {
2078 LOG_WARNING(Render_OpenGL, "(STUBBED)");
2079 std::string temporary = AllocTemporary();
2080 AddLine("MOV.F {}, 1;", temporary);
2081 return temporary;
2082}
2083
2084std::string ARBDecompiler::ThreadId(Operation) {
2085 return fmt::format("{}.threadid", StageInputName(stage));
2086}
2087
2088std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
2089 if (!device.HasWarpIntrinsics()) {
2090 LOG_ERROR(Render_OpenGL,
2091 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
2092 return Visit(operation[0]);
2093 }
2094 const std::string temporary = AllocVectorTemporary();
2095 AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
2096 Visit(operation[1]));
2097 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
2098 return fmt::format("{}.x", temporary);
2099}
2100
2101std::string ARBDecompiler::Barrier(Operation) {
2102 AddLine("BAR;");
2103 return {};
2104}
2105
2106std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
2107 AddLine("MEMBAR.CTA;");
2108 return {};
2109}
2110
2111std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
2112 AddLine("MEMBAR;");
2113 return {};
2114}
2115
2116} // Anonymous namespace
2117
2118std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
2119 const VideoCommon::Shader::Registry& registry,
2120 Tegra::Engines::ShaderType stage, std::string_view identifier) {
2121 return ARBDecompiler(device, ir, registry, stage, identifier).Code();
2122}
2123
2124} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
deleted file mode 100644
index 6afc87220..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ /dev/null
@@ -1,29 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <string_view>
9
10#include "common/common_types.h"
11
12namespace Tegra::Engines {
13enum class ShaderType : u32;
14}
15
16namespace VideoCommon::Shader {
17class ShaderIR;
18class Registry;
19} // namespace VideoCommon::Shader
20
21namespace OpenGL {
22
23class Device;
24
25std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
26 const VideoCommon::Shader::Registry& registry,
27 Tegra::Engines::ShaderType stage, std::string_view identifier);
28
29} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ceb3abcb2..3551dbdcc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -54,40 +54,6 @@ namespace {
54 54
55constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; 55constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
56 56
57struct TextureHandle {
58 constexpr TextureHandle(u32 data, bool via_header_index) {
59 const Tegra::Texture::TextureHandle handle{data};
60 image = handle.tic_id;
61 sampler = via_header_index ? image : handle.tsc_id.Value();
62 }
63
64 u32 image;
65 u32 sampler;
66};
67
68template <typename Engine, typename Entry>
69TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
70 ShaderType shader_type, size_t index = 0) {
71 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
72 if (entry.is_separated) {
73 const u32 buffer_1 = entry.buffer;
74 const u32 buffer_2 = entry.secondary_buffer;
75 const u32 offset_1 = entry.offset;
76 const u32 offset_2 = entry.secondary_offset;
77 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
78 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
79 return TextureHandle(handle_1 | handle_2, via_header_index);
80 }
81 }
82 if (entry.is_bindless) {
83 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
84 return TextureHandle(raw, via_header_index);
85 }
86 const u32 buffer = engine.GetBoundBuffer();
87 const u64 offset = (entry.offset + index) * sizeof(u32);
88 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
89}
90
91/// Translates hardware transform feedback indices 57/// Translates hardware transform feedback indices
92/// @param location Hardware location 58/// @param location Hardware location
93/// @return Pair of ARB_transform_feedback3 token stream first and third arguments 59/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@@ -119,44 +85,6 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
119void oglEnable(GLenum cap, bool state) { 85void oglEnable(GLenum cap, bool state) {
120 (state ? glEnable : glDisable)(cap); 86 (state ? glEnable : glDisable)(cap);
121} 87}
122
123ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
124 if (entry.is_buffer) {
125 return ImageViewType::Buffer;
126 }
127 switch (entry.type) {
128 case Tegra::Shader::TextureType::Texture1D:
129 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
130 case Tegra::Shader::TextureType::Texture2D:
131 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
132 case Tegra::Shader::TextureType::Texture3D:
133 return ImageViewType::e3D;
134 case Tegra::Shader::TextureType::TextureCube:
135 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
136 }
137 UNREACHABLE();
138 return ImageViewType::e2D;
139}
140
141ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
142 switch (entry.type) {
143 case Tegra::Shader::ImageType::Texture1D:
144 return ImageViewType::e1D;
145 case Tegra::Shader::ImageType::Texture1DArray:
146 return ImageViewType::e1DArray;
147 case Tegra::Shader::ImageType::Texture2D:
148 return ImageViewType::e2D;
149 case Tegra::Shader::ImageType::Texture2DArray:
150 return ImageViewType::e2DArray;
151 case Tegra::Shader::ImageType::Texture3D:
152 return ImageViewType::e3D;
153 case Tegra::Shader::ImageType::TextureBuffer:
154 return ImageViewType::Buffer;
155 }
156 UNREACHABLE();
157 return ImageViewType::e2D;
158}
159
160} // Anonymous namespace 88} // Anonymous namespace
161 89
162RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 90RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
172 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 100 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
173 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), 101 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
174 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), 102 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
175 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 103 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
176 async_shaders(emu_window_) {
177 if (device.UseAsynchronousShaders()) {
178 async_shaders.AllocateWorkers();
179 }
180}
181 104
182RasterizerOpenGL::~RasterizerOpenGL() = default; 105RasterizerOpenGL::~RasterizerOpenGL() = default;
183 106
@@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() {
244 } 167 }
245} 168}
246 169
247void RasterizerOpenGL::SetupShaders(bool is_indexed) {
248 u32 clip_distances = 0;
249
250 std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
251 image_view_indices.clear();
252 sampler_handles.clear();
253
254 texture_cache.SynchronizeGraphicsDescriptors();
255
256 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
257 const auto& shader_config = maxwell3d.regs.shader_config[index];
258 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
259
260 // Skip stages that are not enabled
261 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
262 switch (program) {
263 case Maxwell::ShaderProgram::Geometry:
264 program_manager.UseGeometryShader(0);
265 break;
266 case Maxwell::ShaderProgram::Fragment:
267 program_manager.UseFragmentShader(0);
268 break;
269 default:
270 break;
271 }
272 continue;
273 }
274 // Currently this stages are not supported in the OpenGL backend.
275 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
276 if (program == Maxwell::ShaderProgram::TesselationControl ||
277 program == Maxwell::ShaderProgram::TesselationEval) {
278 continue;
279 }
280
281 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
282 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
283 switch (program) {
284 case Maxwell::ShaderProgram::VertexA:
285 case Maxwell::ShaderProgram::VertexB:
286 program_manager.UseVertexShader(program_handle);
287 break;
288 case Maxwell::ShaderProgram::Geometry:
289 program_manager.UseGeometryShader(program_handle);
290 break;
291 case Maxwell::ShaderProgram::Fragment:
292 program_manager.UseFragmentShader(program_handle);
293 break;
294 default:
295 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
296 shader_config.enable.Value(), shader_config.offset);
297 break;
298 }
299
300 // Stage indices are 0 - 5
301 const size_t stage = index == 0 ? 0 : index - 1;
302 shaders[stage] = shader;
303
304 SetupDrawTextures(shader, stage);
305 SetupDrawImages(shader, stage);
306
307 buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
308
309 buffer_cache.UnbindGraphicsStorageBuffers(stage);
310 u32 ssbo_index = 0;
311 for (const auto& buffer : shader->GetEntries().global_memory_entries) {
312 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
313 buffer.cbuf_offset, buffer.is_written);
314 ++ssbo_index;
315 }
316
317 // Workaround for Intel drivers.
318 // When a clip distance is enabled but not set in the shader it crops parts of the screen
319 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
320 // clip distances only when it's written by a shader stage.
321 clip_distances |= shader->GetEntries().clip_distances;
322
323 // When VertexA is enabled, we have dual vertex shaders
324 if (program == Maxwell::ShaderProgram::VertexA) {
325 // VertexB was combined with VertexA, so we skip the VertexB iteration
326 ++index;
327 }
328 }
329 SyncClipEnabled(clip_distances);
330 maxwell3d.dirty.flags[Dirty::Shaders] = false;
331
332 buffer_cache.UpdateGraphicsBuffers(is_indexed);
333
334 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
335 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
336
337 buffer_cache.BindHostGeometryBuffers(is_indexed);
338
339 size_t image_view_index = 0;
340 size_t texture_index = 0;
341 size_t image_index = 0;
342 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
343 const Shader* const shader = shaders[stage];
344 if (!shader) {
345 continue;
346 }
347 buffer_cache.BindHostStageBuffers(stage);
348 const auto& base = device.GetBaseBindings(stage);
349 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
350 texture_index, image_index);
351 }
352}
353
354void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, 170void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
355 const VideoCore::DiskResourceLoadCallback& callback) { 171 const VideoCore::DiskResourceLoadCallback& callback) {}
356 shader_cache.LoadDiskCache(title_id, stop_loading, callback);
357}
358 172
359void RasterizerOpenGL::Clear() { 173void RasterizerOpenGL::Clear() {
360 MICROPROFILE_SCOPE(OpenGL_Clears); 174 MICROPROFILE_SCOPE(OpenGL_Clears);
@@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
434 248
435 // Setup shaders and their used resources. 249 // Setup shaders and their used resources.
436 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; 250 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
437 SetupShaders(is_indexed);
438 251
439 texture_cache.UpdateRenderTargets(false); 252 texture_cache.UpdateRenderTargets(false);
440 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); 253 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
@@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
488 gpu.TickWork(); 301 gpu.TickWork();
489} 302}
490 303
491void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 304void RasterizerOpenGL::DispatchCompute() {
492 Shader* const kernel = shader_cache.GetComputeKernel(code_addr); 305 UNREACHABLE_MSG("Not implemented");
493
494 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
495 BindComputeTextures(kernel);
496
497 const auto& entries = kernel->GetEntries();
498 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
499 buffer_cache.UnbindComputeStorageBuffers();
500 u32 ssbo_index = 0;
501 for (const auto& buffer : entries.global_memory_entries) {
502 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
503 buffer.is_written);
504 ++ssbo_index;
505 }
506 buffer_cache.UpdateComputeBuffers();
507 buffer_cache.BindHostComputeBuffers();
508
509 const auto& launch_desc = kepler_compute.launch_description;
510 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
511 ++num_queued_commands;
512} 306}
513 307
514void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { 308void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
726 return true; 520 return true;
727} 521}
728 522
729void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
730 image_view_indices.clear();
731 sampler_handles.clear();
732
733 texture_cache.SynchronizeComputeDescriptors();
734
735 SetupComputeTextures(kernel);
736 SetupComputeImages(kernel);
737
738 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
739 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
740
741 program_manager.BindCompute(kernel->GetHandle());
742 size_t image_view_index = 0;
743 size_t texture_index = 0;
744 size_t image_index = 0;
745 BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
746}
747
748void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
749 GLuint base_image, size_t& image_view_index,
750 size_t& texture_index, size_t& image_index) {
751 const GLuint* const samplers = sampler_handles.data() + texture_index;
752 const GLuint* const textures = texture_handles.data() + texture_index;
753 const GLuint* const images = image_handles.data() + image_index;
754
755 const size_t num_samplers = entries.samplers.size();
756 for (const auto& sampler : entries.samplers) {
757 for (size_t i = 0; i < sampler.size; ++i) {
758 const ImageViewId image_view_id = image_view_ids[image_view_index++];
759 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
760 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
761 texture_handles[texture_index++] = handle;
762 }
763 }
764 const size_t num_images = entries.images.size();
765 for (size_t unit = 0; unit < num_images; ++unit) {
766 // TODO: Mark as modified
767 const ImageViewId image_view_id = image_view_ids[image_view_index++];
768 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
769 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
770 image_handles[image_index] = handle;
771 ++image_index;
772 }
773 if (num_samplers > 0) {
774 glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
775 glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
776 }
777 if (num_images > 0) {
778 glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
779 }
780}
781
782void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
783 const bool via_header_index =
784 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
785 for (const auto& entry : shader->GetEntries().samplers) {
786 const auto shader_type = static_cast<ShaderType>(stage_index);
787 for (size_t index = 0; index < entry.size; ++index) {
788 const auto handle =
789 GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
790 const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
791 sampler_handles.push_back(sampler->Handle());
792 image_view_indices.push_back(handle.image);
793 }
794 }
795}
796
797void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
798 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
799 for (const auto& entry : kernel->GetEntries().samplers) {
800 for (size_t i = 0; i < entry.size; ++i) {
801 const auto handle =
802 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
803 const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
804 sampler_handles.push_back(sampler->Handle());
805 image_view_indices.push_back(handle.image);
806 }
807 }
808}
809
810void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
811 const bool via_header_index =
812 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
813 for (const auto& entry : shader->GetEntries().images) {
814 const auto shader_type = static_cast<ShaderType>(stage_index);
815 const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
816 image_view_indices.push_back(handle.image);
817 }
818}
819
820void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
821 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
822 for (const auto& entry : shader->GetEntries().images) {
823 const auto handle =
824 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
825 image_view_indices.push_back(handle.image);
826 }
827}
828
829void RasterizerOpenGL::SyncState() { 523void RasterizerOpenGL::SyncState() {
830 SyncViewport(); 524 SyncViewport();
831 SyncRasterizeEnable(); 525 SyncRasterizeEnable();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d30ad698f..1f58f8791 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -28,11 +28,9 @@
28#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
29#include "video_core/renderer_opengl/gl_resource_manager.h" 29#include "video_core/renderer_opengl/gl_resource_manager.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_decompiler.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 31#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state_tracker.h" 32#include "video_core/renderer_opengl/gl_state_tracker.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 33#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/shader/async_shaders.h"
36#include "video_core/textures/texture.h" 34#include "video_core/textures/texture.h"
37 35
38namespace Core::Memory { 36namespace Core::Memory {
@@ -81,7 +79,7 @@ public:
81 79
82 void Draw(bool is_indexed, bool is_instanced) override; 80 void Draw(bool is_indexed, bool is_instanced) override;
83 void Clear() override; 81 void Clear() override;
84 void DispatchCompute(GPUVAddr code_addr) override; 82 void DispatchCompute() override;
85 void ResetCounter(VideoCore::QueryType type) override; 83 void ResetCounter(VideoCore::QueryType type) override;
86 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 84 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
87 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 85 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -118,36 +116,11 @@ public:
118 return num_queued_commands > 0; 116 return num_queued_commands > 0;
119 } 117 }
120 118
121 VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
122 return async_shaders;
123 }
124
125 const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
126 return async_shaders;
127 }
128
129private: 119private:
130 static constexpr size_t MAX_TEXTURES = 192; 120 static constexpr size_t MAX_TEXTURES = 192;
131 static constexpr size_t MAX_IMAGES = 48; 121 static constexpr size_t MAX_IMAGES = 48;
132 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; 122 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
133 123
134 void BindComputeTextures(Shader* kernel);
135
136 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
137 size_t& image_view_index, size_t& texture_index, size_t& image_index);
138
139 /// Configures the current textures to use for the draw command.
140 void SetupDrawTextures(const Shader* shader, size_t stage_index);
141
142 /// Configures the textures used in a compute shader.
143 void SetupComputeTextures(const Shader* kernel);
144
145 /// Configures images in a graphics shader.
146 void SetupDrawImages(const Shader* shader, size_t stage_index);
147
148 /// Configures images in a compute shader.
149 void SetupComputeImages(const Shader* shader);
150
151 /// Syncs state to match guest's 124 /// Syncs state to match guest's
152 void SyncState(); 125 void SyncState();
153 126
@@ -230,8 +203,6 @@ private:
230 /// End a transform feedback 203 /// End a transform feedback
231 void EndTransformFeedback(); 204 void EndTransformFeedback();
232 205
233 void SetupShaders(bool is_indexed);
234
235 Tegra::GPU& gpu; 206 Tegra::GPU& gpu;
236 Tegra::Engines::Maxwell3D& maxwell3d; 207 Tegra::Engines::Maxwell3D& maxwell3d;
237 Tegra::Engines::KeplerCompute& kepler_compute; 208 Tegra::Engines::KeplerCompute& kepler_compute;
@@ -251,8 +222,6 @@ private:
251 AccelerateDMA accelerate_dma; 222 AccelerateDMA accelerate_dma;
252 FenceManagerOpenGL fence_manager; 223 FenceManagerOpenGL fence_manager;
253 224
254 VideoCommon::Shader::AsyncShaders async_shaders;
255
256 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; 225 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
257 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; 226 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
258 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; 227 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5a01c59ec..4dd166156 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,307 +20,19 @@
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/shader_type.h" 21#include "video_core/engines/shader_type.h"
22#include "video_core/memory_manager.h" 22#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_arb_decompiler.h"
24#include "video_core/renderer_opengl/gl_rasterizer.h" 23#include "video_core/renderer_opengl/gl_rasterizer.h"
25#include "video_core/renderer_opengl/gl_resource_manager.h" 24#include "video_core/renderer_opengl/gl_resource_manager.h"
26#include "video_core/renderer_opengl/gl_shader_cache.h" 25#include "video_core/renderer_opengl/gl_shader_cache.h"
27#include "video_core/renderer_opengl/gl_shader_decompiler.h"
28#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
29#include "video_core/renderer_opengl/gl_state_tracker.h" 26#include "video_core/renderer_opengl/gl_state_tracker.h"
30#include "video_core/shader/memory_util.h"
31#include "video_core/shader/registry.h"
32#include "video_core/shader/shader_ir.h"
33#include "video_core/shader_cache.h" 27#include "video_core/shader_cache.h"
34#include "video_core/shader_notify.h" 28#include "video_core/shader_notify.h"
35 29
36namespace OpenGL { 30namespace OpenGL {
37 31
38using Tegra::Engines::ShaderType; 32Shader::Shader() = default;
39using VideoCommon::Shader::GetShaderAddress;
40using VideoCommon::Shader::GetShaderCode;
41using VideoCommon::Shader::GetUniqueIdentifier;
42using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
43using VideoCommon::Shader::ProgramCode;
44using VideoCommon::Shader::Registry;
45using VideoCommon::Shader::ShaderIR;
46using VideoCommon::Shader::STAGE_MAIN_OFFSET;
47
48namespace {
49
50constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
51
52/// Gets the shader type from a Maxwell program type
53constexpr GLenum GetGLShaderType(ShaderType shader_type) {
54 switch (shader_type) {
55 case ShaderType::Vertex:
56 return GL_VERTEX_SHADER;
57 case ShaderType::Geometry:
58 return GL_GEOMETRY_SHADER;
59 case ShaderType::Fragment:
60 return GL_FRAGMENT_SHADER;
61 case ShaderType::Compute:
62 return GL_COMPUTE_SHADER;
63 default:
64 return GL_NONE;
65 }
66}
67
68constexpr const char* GetShaderTypeName(ShaderType shader_type) {
69 switch (shader_type) {
70 case ShaderType::Vertex:
71 return "VS";
72 case ShaderType::TesselationControl:
73 return "HS";
74 case ShaderType::TesselationEval:
75 return "DS";
76 case ShaderType::Geometry:
77 return "GS";
78 case ShaderType::Fragment:
79 return "FS";
80 case ShaderType::Compute:
81 return "CS";
82 }
83 return "UNK";
84}
85
86constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
87 switch (program_type) {
88 case Maxwell::ShaderProgram::VertexA:
89 case Maxwell::ShaderProgram::VertexB:
90 return ShaderType::Vertex;
91 case Maxwell::ShaderProgram::TesselationControl:
92 return ShaderType::TesselationControl;
93 case Maxwell::ShaderProgram::TesselationEval:
94 return ShaderType::TesselationEval;
95 case Maxwell::ShaderProgram::Geometry:
96 return ShaderType::Geometry;
97 case Maxwell::ShaderProgram::Fragment:
98 return ShaderType::Fragment;
99 }
100 return {};
101}
102
103constexpr GLenum AssemblyEnum(ShaderType shader_type) {
104 switch (shader_type) {
105 case ShaderType::Vertex:
106 return GL_VERTEX_PROGRAM_NV;
107 case ShaderType::TesselationControl:
108 return GL_TESS_CONTROL_PROGRAM_NV;
109 case ShaderType::TesselationEval:
110 return GL_TESS_EVALUATION_PROGRAM_NV;
111 case ShaderType::Geometry:
112 return GL_GEOMETRY_PROGRAM_NV;
113 case ShaderType::Fragment:
114 return GL_FRAGMENT_PROGRAM_NV;
115 case ShaderType::Compute:
116 return GL_COMPUTE_PROGRAM_NV;
117 }
118 return {};
119}
120
121std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
122 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
123}
124
125std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
126 const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
127 const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
128 entry.graphics_info, entry.compute_info};
129 auto registry = std::make_shared<Registry>(entry.type, info);
130 for (const auto& [address, value] : entry.keys) {
131 const auto [buffer, offset] = address;
132 registry->InsertKey(buffer, offset, value);
133 }
134 for (const auto& [offset, sampler] : entry.bound_samplers) {
135 registry->InsertBoundSampler(offset, sampler);
136 }
137 for (const auto& [key, sampler] : entry.bindless_samplers) {
138 const auto [buffer, offset] = key;
139 registry->InsertBindlessSampler(buffer, offset, sampler);
140 }
141 return registry;
142}
143
144std::unordered_set<GLenum> GetSupportedFormats() {
145 GLint num_formats;
146 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
147
148 std::vector<GLint> formats(num_formats);
149 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
150
151 std::unordered_set<GLenum> supported_formats;
152 for (const GLint format : formats) {
153 supported_formats.insert(static_cast<GLenum>(format));
154 }
155 return supported_formats;
156}
157
158} // Anonymous namespace
159
160ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
161 const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
162 if (device.UseDriverCache()) {
163 // Ignore hint retrievable if we are using the driver cache
164 hint_retrievable = false;
165 }
166 const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
167 LOG_INFO(Render_OpenGL, "{}", shader_id);
168
169 auto program = std::make_shared<ProgramHandle>();
170
171 if (device.UseAssemblyShaders()) {
172 const std::string arb =
173 DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
174
175 GLuint& arb_prog = program->assembly_program.handle;
176
177// Commented out functions signal OpenGL errors but are compatible with apitrace.
178// Use them only to capture and replay on apitrace.
179#if 0
180 glGenProgramsNV(1, &arb_prog);
181 glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
182 reinterpret_cast<const GLubyte*>(arb.data()));
183#else
184 glGenProgramsARB(1, &arb_prog);
185 glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
186 static_cast<GLsizei>(arb.size()), arb.data());
187#endif
188 const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
189 if (err && *err) {
190 LOG_CRITICAL(Render_OpenGL, "{}", err);
191 LOG_INFO(Render_OpenGL, "\n{}", arb);
192 }
193 } else {
194 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
195 OGLShader shader;
196 shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
197
198 program->source_program.Create(true, hint_retrievable, shader.handle);
199 }
200
201 return program;
202}
203
204Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
205 ProgramSharedPtr program_, bool is_built_)
206 : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
207 is_built{is_built_} {
208 handle = program->assembly_program.handle;
209 if (handle == 0) {
210 handle = program->source_program.handle;
211 }
212 if (is_built) {
213 ASSERT(handle != 0);
214 }
215}
216 33
217Shader::~Shader() = default; 34Shader::~Shader() = default;
218 35
219GLuint Shader::GetHandle() const {
220 DEBUG_ASSERT(registry->IsConsistent());
221 return handle;
222}
223
224bool Shader::IsBuilt() const {
225 return is_built;
226}
227
228void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
229 program->source_program = std::move(new_program);
230 handle = program->source_program.handle;
231 is_built = true;
232}
233
234void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
235 program->assembly_program = std::move(new_program);
236 handle = program->assembly_program.handle;
237 is_built = true;
238}
239
240std::unique_ptr<Shader> Shader::CreateStageFromMemory(
241 const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
242 ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
243 const auto shader_type = GetShaderType(program_type);
244
245 auto& gpu = params.gpu;
246 gpu.ShaderNotify().MarkSharderBuilding();
247
248 auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
249 if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
250 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
251 // TODO(Rodrigo): Handle VertexA shaders
252 // std::optional<ShaderIR> ir_b;
253 // if (!code_b.empty()) {
254 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
255 // }
256 auto program =
257 BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
258 ShaderDiskCacheEntry entry;
259 entry.type = shader_type;
260 entry.code = std::move(code);
261 entry.code_b = std::move(code_b);
262 entry.unique_identifier = params.unique_identifier;
263 entry.bound_buffer = registry->GetBoundBuffer();
264 entry.graphics_info = registry->GetGraphicsInfo();
265 entry.keys = registry->GetKeys();
266 entry.bound_samplers = registry->GetBoundSamplers();
267 entry.bindless_samplers = registry->GetBindlessSamplers();
268 params.disk_cache.SaveEntry(std::move(entry));
269
270 gpu.ShaderNotify().MarkShaderComplete();
271
272 return std::unique_ptr<Shader>(new Shader(std::move(registry),
273 MakeEntries(params.device, ir, shader_type),
274 std::move(program), true));
275 } else {
276 // Required for entries
277 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
278 auto entries = MakeEntries(params.device, ir, shader_type);
279
280 async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
281 std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
282 COMPILER_SETTINGS, *registry, cpu_addr);
283
284 auto program = std::make_shared<ProgramHandle>();
285 return std::unique_ptr<Shader>(
286 new Shader(std::move(registry), std::move(entries), std::move(program), false));
287 }
288}
289
290std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
291 ProgramCode code) {
292 auto& gpu = params.gpu;
293 gpu.ShaderNotify().MarkSharderBuilding();
294
295 auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
296 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
297 const u64 uid = params.unique_identifier;
298 auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
299
300 ShaderDiskCacheEntry entry;
301 entry.type = ShaderType::Compute;
302 entry.code = std::move(code);
303 entry.unique_identifier = uid;
304 entry.bound_buffer = registry->GetBoundBuffer();
305 entry.compute_info = registry->GetComputeInfo();
306 entry.keys = registry->GetKeys();
307 entry.bound_samplers = registry->GetBoundSamplers();
308 entry.bindless_samplers = registry->GetBindlessSamplers();
309 params.disk_cache.SaveEntry(std::move(entry));
310
311 gpu.ShaderNotify().MarkShaderComplete();
312
313 return std::unique_ptr<Shader>(new Shader(std::move(registry),
314 MakeEntries(params.device, ir, ShaderType::Compute),
315 std::move(program)));
316}
317
318std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
319 const PrecompiledShader& precompiled_shader) {
320 return std::unique_ptr<Shader>(new Shader(
321 precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
322}
323
324ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, 36ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
325 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 37 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
326 Tegra::Engines::Maxwell3D& maxwell3d_, 38 Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
331 43
332ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; 44ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
333 45
334void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading,
335 const VideoCore::DiskResourceLoadCallback& callback) {
336 disk_cache.BindTitleID(title_id);
337 const std::optional transferable = disk_cache.LoadTransferable();
338
339 LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
340 transferable.has_value() ? transferable->size() : 0);
341
342 if (!transferable) {
343 return;
344 }
345
346 std::vector<ShaderDiskCachePrecompiled> gl_cache;
347 if (!device.UseAssemblyShaders() && !device.UseDriverCache()) {
348 // Only load precompiled cache when we are not using assembly shaders
349 gl_cache = disk_cache.LoadPrecompiled();
350 }
351 const auto supported_formats = GetSupportedFormats();
352
353 // Track if precompiled cache was altered during loading to know if we have to
354 // serialize the virtual precompiled cache file back to the hard drive
355 bool precompiled_cache_altered = false;
356
357 // Inform the frontend about shader build initialization
358 if (callback) {
359 callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
360 }
361
362 std::mutex mutex;
363 std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
364 std::atomic_bool gl_cache_failed = false;
365
366 const auto find_precompiled = [&gl_cache](u64 id) {
367 return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
368 };
369
370 const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
371 std::size_t end) {
372 const auto scope = context->Acquire();
373
374 for (std::size_t i = begin; i < end; ++i) {
375 if (stop_loading.stop_requested()) {
376 return;
377 }
378 const auto& entry = (*transferable)[i];
379 const u64 uid = entry.unique_identifier;
380 const auto it = find_precompiled(uid);
381 const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
382
383 const bool is_compute = entry.type == ShaderType::Compute;
384 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
385 auto registry = MakeRegistry(entry);
386 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
387
388 ProgramSharedPtr program;
389 if (precompiled_entry) {
390 // If the shader is precompiled, attempt to load it with
391 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
392 if (!program) {
393 gl_cache_failed = true;
394 }
395 }
396 if (!program) {
397 // Otherwise compile it from GLSL
398 program = BuildShader(device, entry.type, uid, ir, *registry, true);
399 }
400
401 PrecompiledShader shader;
402 shader.program = std::move(program);
403 shader.registry = std::move(registry);
404 shader.entries = MakeEntries(device, ir, entry.type);
405
406 std::scoped_lock lock{mutex};
407 if (callback) {
408 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
409 transferable->size());
410 }
411 runtime_cache.emplace(entry.unique_identifier, std::move(shader));
412 }
413 };
414
415 const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
416 const std::size_t bucket_size{transferable->size() / num_workers};
417 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
418 std::vector<std::thread> threads(num_workers);
419 for (std::size_t i = 0; i < num_workers; ++i) {
420 const bool is_last_worker = i + 1 == num_workers;
421 const std::size_t start{bucket_size * i};
422 const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
423
424 // On some platforms the shared context has to be created from the GUI thread
425 contexts[i] = emu_window.CreateSharedContext();
426 threads[i] = std::thread(worker, contexts[i].get(), start, end);
427 }
428 for (auto& thread : threads) {
429 thread.join();
430 }
431
432 if (gl_cache_failed) {
433 // Invalidate the precompiled cache if a shader dumped shader was rejected
434 disk_cache.InvalidatePrecompiled();
435 precompiled_cache_altered = true;
436 return;
437 }
438 if (stop_loading.stop_requested()) {
439 return;
440 }
441
442 if (device.UseAssemblyShaders() || device.UseDriverCache()) {
443 // Don't store precompiled binaries for assembly shaders or when using the driver cache
444 return;
445 }
446
447 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
448 // before precompiling them
449
450 for (std::size_t i = 0; i < transferable->size(); ++i) {
451 const u64 id = (*transferable)[i].unique_identifier;
452 const auto it = find_precompiled(id);
453 if (it == gl_cache.end()) {
454 const GLuint program = runtime_cache.at(id).program->source_program.handle;
455 disk_cache.SavePrecompiled(id, program);
456 precompiled_cache_altered = true;
457 }
458 }
459
460 if (precompiled_cache_altered) {
461 disk_cache.SaveVirtualPrecompiledFile();
462 }
463}
464
465ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
466 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
467 const std::unordered_set<GLenum>& supported_formats) {
468 if (!supported_formats.contains(precompiled_entry.binary_format)) {
469 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
470 return {};
471 }
472
473 auto program = std::make_shared<ProgramHandle>();
474 GLuint& handle = program->source_program.handle;
475 handle = glCreateProgram();
476 glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
477 glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
478 static_cast<GLsizei>(precompiled_entry.binary.size()));
479
480 GLint link_status;
481 glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
482 if (link_status == GL_FALSE) {
483 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
484 return {};
485 }
486
487 return program;
488}
489
490Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
491 VideoCommon::Shader::AsyncShaders& async_shaders) {
492 if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
493 auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
494 if (last_shader->IsBuilt()) {
495 return last_shader;
496 }
497 }
498
499 const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
500
501 if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
502 auto completed_work = async_shaders.GetCompletedWork();
503 for (auto& work : completed_work) {
504 Shader* shader = TryGet(work.cpu_address);
505 gpu.ShaderNotify().MarkShaderComplete();
506 if (shader == nullptr) {
507 continue;
508 }
509 using namespace VideoCommon::Shader;
510 if (work.backend == AsyncShaders::Backend::OpenGL) {
511 shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
512 } else if (work.backend == AsyncShaders::Backend::GLASM) {
513 shader->AsyncGLASMBuilt(std::move(work.program.glasm));
514 }
515
516 auto& registry = shader->GetRegistry();
517
518 ShaderDiskCacheEntry entry;
519 entry.type = work.shader_type;
520 entry.code = std::move(work.code);
521 entry.code_b = std::move(work.code_b);
522 entry.unique_identifier = work.uid;
523 entry.bound_buffer = registry.GetBoundBuffer();
524 entry.graphics_info = registry.GetGraphicsInfo();
525 entry.keys = registry.GetKeys();
526 entry.bound_samplers = registry.GetBoundSamplers();
527 entry.bindless_samplers = registry.GetBindlessSamplers();
528 disk_cache.SaveEntry(std::move(entry));
529 }
530 }
531
532 // Look up shader in the cache based on address
533 const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
534 if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
535 return last_shaders[static_cast<std::size_t>(program)] = shader;
536 }
537
538 const u8* const host_ptr{gpu_memory.GetPointer(address)};
539
540 // No shader found - create a new one
541 ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
542 ProgramCode code_b;
543 if (program == Maxwell::ShaderProgram::VertexA) {
544 const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
545 const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
546 code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
547 }
548 const std::size_t code_size = code.size() * sizeof(u64);
549
550 const u64 unique_identifier = GetUniqueIdentifier(
551 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
552
553 const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
554 *cpu_addr, host_ptr, unique_identifier};
555
556 std::unique_ptr<Shader> shader;
557 const auto found = runtime_cache.find(unique_identifier);
558 if (found == runtime_cache.end()) {
559 shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
560 async_shaders, cpu_addr.value_or(0));
561 } else {
562 shader = Shader::CreateFromCache(params, found->second);
563 }
564
565 Shader* const result = shader.get();
566 if (cpu_addr) {
567 Register(std::move(shader), *cpu_addr, code_size);
568 } else {
569 null_shader = std::move(shader);
570 }
571
572 return last_shaders[static_cast<std::size_t>(program)] = result;
573}
574
575Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
576 const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
577
578 if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
579 return kernel;
580 }
581
582 // No kernel found, create a new one
583 const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
584 ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
585 const std::size_t code_size{code.size() * sizeof(u64)};
586 const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
587
588 const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
589 *cpu_addr, host_ptr, unique_identifier};
590
591 std::unique_ptr<Shader> kernel;
592 const auto found = runtime_cache.find(unique_identifier);
593 if (found == runtime_cache.end()) {
594 kernel = Shader::CreateKernelFromMemory(params, std::move(code));
595 } else {
596 kernel = Shader::CreateFromCache(params, found->second);
597 }
598
599 Shader* const result = kernel.get();
600 if (cpu_addr) {
601 Register(std::move(kernel), *cpu_addr, code_size);
602 } else {
603 null_kernel = std::move(kernel);
604 }
605 return result;
606}
607
608} // namespace OpenGL 46} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b30308b6f..ad3d15a76 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -19,10 +19,6 @@
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "video_core/engines/shader_type.h" 20#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
22#include "video_core/renderer_opengl/gl_shader_decompiler.h"
23#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
24#include "video_core/shader/registry.h"
25#include "video_core/shader/shader_ir.h"
26#include "video_core/shader_cache.h" 22#include "video_core/shader_cache.h"
27 23
28namespace Tegra { 24namespace Tegra {
@@ -33,10 +29,6 @@ namespace Core::Frontend {
33class EmuWindow; 29class EmuWindow;
34} 30}
35 31
36namespace VideoCommon::Shader {
37class AsyncShaders;
38}
39
40namespace OpenGL { 32namespace OpenGL {
41 33
42class Device; 34class Device;
@@ -44,77 +36,10 @@ class RasterizerOpenGL;
44 36
45using Maxwell = Tegra::Engines::Maxwell3D::Regs; 37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
46 38
47struct ProgramHandle { 39class Shader {
48 OGLProgram source_program;
49 OGLAssemblyProgram assembly_program;
50};
51using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
52
53struct PrecompiledShader {
54 ProgramSharedPtr program;
55 std::shared_ptr<VideoCommon::Shader::Registry> registry;
56 ShaderEntries entries;
57};
58
59struct ShaderParameters {
60 Tegra::GPU& gpu;
61 Tegra::Engines::ConstBufferEngineInterface& engine;
62 ShaderDiskCacheOpenGL& disk_cache;
63 const Device& device;
64 VAddr cpu_addr;
65 const u8* host_ptr;
66 u64 unique_identifier;
67};
68
69ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
70 u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
71 const VideoCommon::Shader::Registry& registry,
72 bool hint_retrievable = false);
73
74class Shader final {
75public: 40public:
41 explicit Shader();
76 ~Shader(); 42 ~Shader();
77
78 /// Gets the GL program handle for the shader
79 GLuint GetHandle() const;
80
81 bool IsBuilt() const;
82
83 /// Gets the shader entries for the shader
84 const ShaderEntries& GetEntries() const {
85 return entries;
86 }
87
88 const VideoCommon::Shader::Registry& GetRegistry() const {
89 return *registry;
90 }
91
92 /// Mark a OpenGL shader as built
93 void AsyncOpenGLBuilt(OGLProgram new_program);
94
95 /// Mark a GLASM shader as built
96 void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
97
98 static std::unique_ptr<Shader> CreateStageFromMemory(
99 const ShaderParameters& params, Maxwell::ShaderProgram program_type,
100 ProgramCode program_code, ProgramCode program_code_b,
101 VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
102
103 static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
104 ProgramCode code);
105
106 static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
107 const PrecompiledShader& precompiled_shader);
108
109private:
110 explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
111 ProgramSharedPtr program, bool is_built_ = true);
112
113 std::shared_ptr<VideoCommon::Shader::Registry> registry;
114 ShaderEntries entries;
115 ProgramSharedPtr program;
116 GLuint handle = 0;
117 bool is_built{};
118}; 43};
119 44
120class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { 45class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
@@ -126,36 +51,13 @@ public:
126 Tegra::MemoryManager& gpu_memory_, const Device& device_); 51 Tegra::MemoryManager& gpu_memory_, const Device& device_);
127 ~ShaderCacheOpenGL() override; 52 ~ShaderCacheOpenGL() override;
128 53
129 /// Loads disk cache for the current game
130 void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
131 const VideoCore::DiskResourceLoadCallback& callback);
132
133 /// Gets the current specified shader stage program
134 Shader* GetStageProgram(Maxwell::ShaderProgram program,
135 VideoCommon::Shader::AsyncShaders& async_shaders);
136
137 /// Gets a compute kernel in the passed address
138 Shader* GetComputeKernel(GPUVAddr code_addr);
139
140private: 54private:
141 ProgramSharedPtr GeneratePrecompiledProgram(
142 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
143 const std::unordered_set<GLenum>& supported_formats);
144
145 Core::Frontend::EmuWindow& emu_window; 55 Core::Frontend::EmuWindow& emu_window;
146 Tegra::GPU& gpu; 56 Tegra::GPU& gpu;
147 Tegra::MemoryManager& gpu_memory; 57 Tegra::MemoryManager& gpu_memory;
148 Tegra::Engines::Maxwell3D& maxwell3d; 58 Tegra::Engines::Maxwell3D& maxwell3d;
149 Tegra::Engines::KeplerCompute& kepler_compute; 59 Tegra::Engines::KeplerCompute& kepler_compute;
150 const Device& device; 60 const Device& device;
151
152 ShaderDiskCacheOpenGL disk_cache;
153 std::unordered_map<u64, PrecompiledShader> runtime_cache;
154
155 std::unique_ptr<Shader> null_shader;
156 std::unique_ptr<Shader> null_kernel;
157
158 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
159}; 61};
160 62
161} // namespace OpenGL 63} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
deleted file mode 100644
index 9c28498e8..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ /dev/null
@@ -1,2986 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <string>
7#include <string_view>
8#include <utility>
9#include <variant>
10#include <vector>
11
12#include <fmt/format.h>
13
14#include "common/alignment.h"
15#include "common/assert.h"
16#include "common/common_types.h"
17#include "common/div_ceil.h"
18#include "common/logging/log.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_device.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/shader/ast.h"
25#include "video_core/shader/node.h"
26#include "video_core/shader/shader_ir.h"
27#include "video_core/shader/transform_feedback.h"
28
29namespace OpenGL {
30
31namespace {
32
33using Tegra::Engines::ShaderType;
34using Tegra::Shader::Attribute;
35using Tegra::Shader::Header;
36using Tegra::Shader::IpaInterpMode;
37using Tegra::Shader::IpaMode;
38using Tegra::Shader::IpaSampleMode;
39using Tegra::Shader::PixelImap;
40using Tegra::Shader::Register;
41using Tegra::Shader::TextureType;
42
43using namespace VideoCommon::Shader;
44using namespace std::string_literals;
45
46using Maxwell = Tegra::Engines::Maxwell3D::Regs;
47using Operation = const OperationNode&;
48
49class ASTDecompiler;
50class ExprDecompiler;
51
52enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
53
54constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
55
56constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
57constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
58
59struct TextureOffset {};
60struct TextureDerivates {};
61using TextureArgument = std::pair<Type, Node>;
62using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
63
64constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
65constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
66
67constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint
69#define itof intBitsToFloat
70#define utof uintBitsToFloat
71
72bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
73 bvec2 is_nan1 = isnan(pair1);
74 bvec2 is_nan2 = isnan(pair2);
75 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
76}}
77
78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
80)";
81
82class ShaderWriter final {
83public:
84 void AddExpression(std::string_view text) {
85 DEBUG_ASSERT(scope >= 0);
86 if (!text.empty()) {
87 AppendIndentation();
88 }
89 shader_source += text;
90 }
91
92 // Forwards all arguments directly to libfmt.
93 // Note that all formatting requirements for fmt must be
94 // obeyed when using this function. (e.g. {{ must be used
95 // printing the character '{' is desirable. Ditto for }} and '}',
96 // etc).
97 template <typename... Args>
98 void AddLine(std::string_view text, Args&&... args) {
99 AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...));
100 AddNewLine();
101 }
102
103 void AddNewLine() {
104 DEBUG_ASSERT(scope >= 0);
105 shader_source += '\n';
106 }
107
108 std::string GenerateTemporary() {
109 return fmt::format("tmp{}", temporary_index++);
110 }
111
112 std::string GetResult() {
113 return std::move(shader_source);
114 }
115
116 s32 scope = 0;
117
118private:
119 void AppendIndentation() {
120 shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
121 }
122
123 std::string shader_source;
124 u32 temporary_index = 1;
125};
126
127class Expression final {
128public:
129 Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
130 ASSERT(type != Type::Void);
131 }
132 Expression() : type{Type::Void} {}
133
134 Type GetType() const {
135 return type;
136 }
137
138 std::string GetCode() const {
139 return code;
140 }
141
142 void CheckVoid() const {
143 ASSERT(type == Type::Void);
144 }
145
146 std::string As(Type type_) const {
147 switch (type_) {
148 case Type::Bool:
149 return AsBool();
150 case Type::Bool2:
151 return AsBool2();
152 case Type::Float:
153 return AsFloat();
154 case Type::Int:
155 return AsInt();
156 case Type::Uint:
157 return AsUint();
158 case Type::HalfFloat:
159 return AsHalfFloat();
160 default:
161 UNREACHABLE_MSG("Invalid type");
162 return code;
163 }
164 }
165
166 std::string AsBool() const {
167 switch (type) {
168 case Type::Bool:
169 return code;
170 default:
171 UNREACHABLE_MSG("Incompatible types");
172 return code;
173 }
174 }
175
176 std::string AsBool2() const {
177 switch (type) {
178 case Type::Bool2:
179 return code;
180 default:
181 UNREACHABLE_MSG("Incompatible types");
182 return code;
183 }
184 }
185
186 std::string AsFloat() const {
187 switch (type) {
188 case Type::Float:
189 return code;
190 case Type::Uint:
191 return fmt::format("utof({})", code);
192 case Type::Int:
193 return fmt::format("itof({})", code);
194 case Type::HalfFloat:
195 return fmt::format("utof(packHalf2x16({}))", code);
196 default:
197 UNREACHABLE_MSG("Incompatible types");
198 return code;
199 }
200 }
201
202 std::string AsInt() const {
203 switch (type) {
204 case Type::Float:
205 return fmt::format("ftoi({})", code);
206 case Type::Uint:
207 return fmt::format("int({})", code);
208 case Type::Int:
209 return code;
210 case Type::HalfFloat:
211 return fmt::format("int(packHalf2x16({}))", code);
212 default:
213 UNREACHABLE_MSG("Incompatible types");
214 return code;
215 }
216 }
217
218 std::string AsUint() const {
219 switch (type) {
220 case Type::Float:
221 return fmt::format("ftou({})", code);
222 case Type::Uint:
223 return code;
224 case Type::Int:
225 return fmt::format("uint({})", code);
226 case Type::HalfFloat:
227 return fmt::format("packHalf2x16({})", code);
228 default:
229 UNREACHABLE_MSG("Incompatible types");
230 return code;
231 }
232 }
233
234 std::string AsHalfFloat() const {
235 switch (type) {
236 case Type::Float:
237 return fmt::format("unpackHalf2x16(ftou({}))", code);
238 case Type::Uint:
239 return fmt::format("unpackHalf2x16({})", code);
240 case Type::Int:
241 return fmt::format("unpackHalf2x16(int({}))", code);
242 case Type::HalfFloat:
243 return code;
244 default:
245 UNREACHABLE_MSG("Incompatible types");
246 return code;
247 }
248 }
249
250private:
251 std::string code;
252 Type type{};
253};
254
255const char* GetTypeString(Type type) {
256 switch (type) {
257 case Type::Bool:
258 return "bool";
259 case Type::Bool2:
260 return "bvec2";
261 case Type::Float:
262 return "float";
263 case Type::Int:
264 return "int";
265 case Type::Uint:
266 return "uint";
267 case Type::HalfFloat:
268 return "vec2";
269 default:
270 UNREACHABLE_MSG("Invalid type");
271 return "<invalid type>";
272 }
273}
274
275const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
276 switch (image_type) {
277 case Tegra::Shader::ImageType::Texture1D:
278 return "1D";
279 case Tegra::Shader::ImageType::TextureBuffer:
280 return "Buffer";
281 case Tegra::Shader::ImageType::Texture1DArray:
282 return "1DArray";
283 case Tegra::Shader::ImageType::Texture2D:
284 return "2D";
285 case Tegra::Shader::ImageType::Texture2DArray:
286 return "2DArray";
287 case Tegra::Shader::ImageType::Texture3D:
288 return "3D";
289 default:
290 UNREACHABLE();
291 return "1D";
292 }
293}
294
295/// Describes primitive behavior on geometry shaders
296std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
297 switch (topology) {
298 case Maxwell::PrimitiveTopology::Points:
299 return {"points", 1};
300 case Maxwell::PrimitiveTopology::Lines:
301 case Maxwell::PrimitiveTopology::LineStrip:
302 return {"lines", 2};
303 case Maxwell::PrimitiveTopology::LinesAdjacency:
304 case Maxwell::PrimitiveTopology::LineStripAdjacency:
305 return {"lines_adjacency", 4};
306 case Maxwell::PrimitiveTopology::Triangles:
307 case Maxwell::PrimitiveTopology::TriangleStrip:
308 case Maxwell::PrimitiveTopology::TriangleFan:
309 return {"triangles", 3};
310 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
311 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
312 return {"triangles_adjacency", 6};
313 default:
314 UNIMPLEMENTED_MSG("topology={}", topology);
315 return {"points", 1};
316 }
317}
318
319/// Generates code to use for a swizzle operation.
320constexpr const char* GetSwizzle(std::size_t element) {
321 constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
322 return swizzle.at(element);
323}
324
325constexpr const char* GetColorSwizzle(std::size_t element) {
326 constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
327 return swizzle.at(element);
328}
329
330/// Translate topology
331std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
332 switch (topology) {
333 case Tegra::Shader::OutputTopology::PointList:
334 return "points";
335 case Tegra::Shader::OutputTopology::LineStrip:
336 return "line_strip";
337 case Tegra::Shader::OutputTopology::TriangleStrip:
338 return "triangle_strip";
339 default:
340 UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
341 return "points";
342 }
343}
344
345/// Returns true if an object has to be treated as precise
346bool IsPrecise(Operation operand) {
347 const auto& meta{operand.GetMeta()};
348 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
349 return arithmetic->precise;
350 }
351 return false;
352}
353
354bool IsPrecise(const Node& node) {
355 if (const auto operation = std::get_if<OperationNode>(&*node)) {
356 return IsPrecise(*operation);
357 }
358 return false;
359}
360
361constexpr bool IsGenericAttribute(Attribute::Index index) {
362 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
363}
364
365constexpr bool IsLegacyTexCoord(Attribute::Index index) {
366 return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
367 static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
368}
369
370constexpr Attribute::Index ToGenericAttribute(u64 value) {
371 return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
372}
373
374constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
375 return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
376}
377
378u32 GetGenericAttributeIndex(Attribute::Index index) {
379 ASSERT(IsGenericAttribute(index));
380 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
381}
382
383constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
384 switch (stack) {
385 case MetaStackClass::Ssy:
386 return "ssy";
387 case MetaStackClass::Pbk:
388 return "pbk";
389 }
390 return {};
391}
392
393std::string FlowStackName(MetaStackClass stack) {
394 return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
395}
396
397std::string FlowStackTopName(MetaStackClass stack) {
398 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
399}
400
401struct GenericVaryingDescription {
402 std::string name;
403 u8 first_element = 0;
404 bool is_scalar = false;
405};
406
407class GLSLDecompiler final {
408public:
409 explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
410 ShaderType stage_, std::string_view identifier_,
411 std::string_view suffix_)
412 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
413 identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
414 if (stage != ShaderType::Compute) {
415 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
416 }
417 }
418
419 void Decompile() {
420 DeclareHeader();
421 DeclareVertex();
422 DeclareGeometry();
423 DeclareFragment();
424 DeclareCompute();
425 DeclareInputAttributes();
426 DeclareOutputAttributes();
427 DeclareImages();
428 DeclareSamplers();
429 DeclareGlobalMemory();
430 DeclareConstantBuffers();
431 DeclareLocalMemory();
432 DeclareRegisters();
433 DeclarePredicates();
434 DeclareInternalFlags();
435 DeclareCustomVariables();
436 DeclarePhysicalAttributeReader();
437
438 code.AddLine("void main() {{");
439 ++code.scope;
440
441 if (stage == ShaderType::Vertex) {
442 code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
443 }
444
445 if (ir.IsDecompiled()) {
446 DecompileAST();
447 } else {
448 DecompileBranchMode();
449 }
450
451 --code.scope;
452 code.AddLine("}}");
453 }
454
455 std::string GetResult() {
456 return code.GetResult();
457 }
458
459private:
460 friend class ASTDecompiler;
461 friend class ExprDecompiler;
462
463 void DecompileBranchMode() {
464 // VM's program counter
465 const auto first_address = ir.GetBasicBlocks().begin()->first;
466 code.AddLine("uint jmp_to = {}U;", first_address);
467
468 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
469 // unlikely that shaders will use 20 nested SSYs and PBKs.
470 constexpr u32 FLOW_STACK_SIZE = 20;
471 if (!ir.IsFlowStackDisabled()) {
472 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
473 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
474 code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
475 }
476 }
477
478 code.AddLine("while (true) {{");
479 ++code.scope;
480
481 code.AddLine("switch (jmp_to) {{");
482
483 for (const auto& pair : ir.GetBasicBlocks()) {
484 const auto& [address, bb] = pair;
485 code.AddLine("case 0x{:X}U: {{", address);
486 ++code.scope;
487
488 VisitBlock(bb);
489
490 --code.scope;
491 code.AddLine("}}");
492 }
493
494 code.AddLine("default: return;");
495 code.AddLine("}}");
496
497 --code.scope;
498 code.AddLine("}}");
499 }
500
501 void DecompileAST();
502
503 void DeclareHeader() {
504 if (!identifier.empty()) {
505 code.AddLine("// {}", identifier);
506 }
507 const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
508 code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
509 code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
510 if (device.HasShaderBallot()) {
511 code.AddLine("#extension GL_ARB_shader_ballot : require");
512 }
513 if (device.HasVertexViewportLayer()) {
514 code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
515 }
516 if (device.HasImageLoadFormatted()) {
517 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
518 }
519 if (device.HasTextureShadowLod()) {
520 code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
521 }
522 if (device.HasWarpIntrinsics()) {
523 code.AddLine("#extension GL_NV_gpu_shader5 : require");
524 code.AddLine("#extension GL_NV_shader_thread_group : require");
525 code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
526 }
527 // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
528 // operations) on places where we don't want to.
529 // Thanks to Ryujinx for finding this workaround.
530 code.AddLine("#pragma optionNV(fastmath off)");
531
532 code.AddNewLine();
533
534 code.AddLine(COMMON_DECLARATIONS);
535 }
536
537 void DeclareVertex() {
538 if (stage != ShaderType::Vertex) {
539 return;
540 }
541
542 DeclareVertexRedeclarations();
543 }
544
545 void DeclareGeometry() {
546 if (stage != ShaderType::Geometry) {
547 return;
548 }
549
550 const auto& info = registry.GetGraphicsInfo();
551 const auto input_topology = info.primitive_topology;
552 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
553 max_input_vertices = max_vertices;
554 code.AddLine("layout ({}) in;", glsl_topology);
555
556 const auto topology = GetTopologyName(header.common3.output_topology);
557 const auto max_output_vertices = header.common4.max_output_vertices.Value();
558 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
559 code.AddNewLine();
560
561 code.AddLine("in gl_PerVertex {{");
562 ++code.scope;
563 code.AddLine("vec4 gl_Position;");
564 --code.scope;
565 code.AddLine("}} gl_in[];");
566
567 DeclareVertexRedeclarations();
568 }
569
570 void DeclareFragment() {
571 if (stage != ShaderType::Fragment) {
572 return;
573 }
574 if (ir.UsesLegacyVaryings()) {
575 code.AddLine("in gl_PerFragment {{");
576 ++code.scope;
577 code.AddLine("vec4 gl_TexCoord[8];");
578 code.AddLine("vec4 gl_Color;");
579 code.AddLine("vec4 gl_SecondaryColor;");
580 --code.scope;
581 code.AddLine("}};");
582 }
583
584 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
585 code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
586 }
587 }
588
589 void DeclareCompute() {
590 if (stage != ShaderType::Compute) {
591 return;
592 }
593 const auto& info = registry.GetComputeInfo();
594 if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
595 const u32 limit = device.GetMaxComputeSharedMemorySize();
596 if (size > limit) {
597 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
598 size, limit);
599 size = limit;
600 }
601
602 code.AddLine("shared uint smem[{}];", size / 4);
603 code.AddNewLine();
604 }
605 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
606 info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
607 code.AddNewLine();
608 }
609
610 void DeclareVertexRedeclarations() {
611 code.AddLine("out gl_PerVertex {{");
612 ++code.scope;
613
614 auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
615 if (!pos_xfb.empty()) {
616 pos_xfb = fmt::format("layout ({}) ", pos_xfb);
617 }
618 const char* pos_type =
619 FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
620 code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
621
622 for (const auto attribute : ir.GetOutputAttributes()) {
623 if (attribute == Attribute::Index::ClipDistances0123 ||
624 attribute == Attribute::Index::ClipDistances4567) {
625 code.AddLine("float gl_ClipDistance[];");
626 break;
627 }
628 }
629
630 if (stage != ShaderType::Geometry &&
631 (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
632 if (ir.UsesLayer()) {
633 code.AddLine("int gl_Layer;");
634 }
635 if (ir.UsesViewportIndex()) {
636 code.AddLine("int gl_ViewportIndex;");
637 }
638 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
639 !device.HasVertexViewportLayer()) {
640 LOG_ERROR(
641 Render_OpenGL,
642 "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
643 }
644
645 if (ir.UsesPointSize()) {
646 code.AddLine("float gl_PointSize;");
647 }
648
649 if (ir.UsesLegacyVaryings()) {
650 code.AddLine("vec4 gl_TexCoord[8];");
651 code.AddLine("vec4 gl_FrontColor;");
652 code.AddLine("vec4 gl_FrontSecondaryColor;");
653 code.AddLine("vec4 gl_BackColor;");
654 code.AddLine("vec4 gl_BackSecondaryColor;");
655 }
656
657 --code.scope;
658 code.AddLine("}};");
659 code.AddNewLine();
660
661 if (stage == ShaderType::Geometry) {
662 if (ir.UsesLayer()) {
663 code.AddLine("out int gl_Layer;");
664 }
665 if (ir.UsesViewportIndex()) {
666 code.AddLine("out int gl_ViewportIndex;");
667 }
668 }
669 code.AddNewLine();
670 }
671
672 void DeclareRegisters() {
673 const auto& registers = ir.GetRegisters();
674 for (const u32 gpr : registers) {
675 code.AddLine("float {} = 0.0f;", GetRegister(gpr));
676 }
677 if (!registers.empty()) {
678 code.AddNewLine();
679 }
680 }
681
682 void DeclareCustomVariables() {
683 const u32 num_custom_variables = ir.GetNumCustomVariables();
684 for (u32 i = 0; i < num_custom_variables; ++i) {
685 code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
686 }
687 if (num_custom_variables > 0) {
688 code.AddNewLine();
689 }
690 }
691
692 void DeclarePredicates() {
693 const auto& predicates = ir.GetPredicates();
694 for (const auto pred : predicates) {
695 code.AddLine("bool {} = false;", GetPredicate(pred));
696 }
697 if (!predicates.empty()) {
698 code.AddNewLine();
699 }
700 }
701
702 void DeclareLocalMemory() {
703 u64 local_memory_size = 0;
704 if (stage == ShaderType::Compute) {
705 local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
706 } else {
707 local_memory_size = header.GetLocalMemorySize();
708 }
709 if (local_memory_size == 0) {
710 return;
711 }
712 const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
713 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
714 code.AddNewLine();
715 }
716
717 void DeclareInternalFlags() {
718 for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
719 const auto flag_code = static_cast<InternalFlag>(flag);
720 code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
721 }
722 code.AddNewLine();
723 }
724
725 const char* GetInputFlags(PixelImap attribute) {
726 switch (attribute) {
727 case PixelImap::Perspective:
728 return "smooth";
729 case PixelImap::Constant:
730 return "flat";
731 case PixelImap::ScreenLinear:
732 return "noperspective";
733 case PixelImap::Unused:
734 break;
735 }
736 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
737 return {};
738 }
739
740 void DeclareInputAttributes() {
741 if (ir.HasPhysicalAttributes()) {
742 const u32 num_inputs{GetNumPhysicalInputAttributes()};
743 for (u32 i = 0; i < num_inputs; ++i) {
744 DeclareInputAttribute(ToGenericAttribute(i), true);
745 }
746 code.AddNewLine();
747 return;
748 }
749
750 const auto& attributes = ir.GetInputAttributes();
751 for (const auto index : attributes) {
752 if (IsGenericAttribute(index)) {
753 DeclareInputAttribute(index, false);
754 }
755 }
756 if (!attributes.empty()) {
757 code.AddNewLine();
758 }
759 }
760
761 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
762 const u32 location{GetGenericAttributeIndex(index)};
763
764 std::string name{GetGenericInputAttribute(index)};
765 if (stage == ShaderType::Geometry) {
766 name = "gs_" + name + "[]";
767 }
768
769 std::string suffix_;
770 if (stage == ShaderType::Fragment) {
771 const auto input_mode{header.ps.GetPixelImap(location)};
772 if (input_mode == PixelImap::Unused) {
773 return;
774 }
775 suffix_ = GetInputFlags(input_mode);
776 }
777
778 code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
779 }
780
781 void DeclareOutputAttributes() {
782 if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
783 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
784 DeclareOutputAttribute(ToGenericAttribute(i));
785 }
786 code.AddNewLine();
787 return;
788 }
789
790 const auto& attributes = ir.GetOutputAttributes();
791 for (const auto index : attributes) {
792 if (IsGenericAttribute(index)) {
793 DeclareOutputAttribute(index);
794 }
795 }
796 if (!attributes.empty()) {
797 code.AddNewLine();
798 }
799 }
800
801 std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
802 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
803 const auto it = transform_feedback.find(location);
804 if (it == transform_feedback.end()) {
805 return std::nullopt;
806 }
807 return it->second.components;
808 }
809
810 std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
811 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
812 const auto it = transform_feedback.find(location);
813 if (it == transform_feedback.end()) {
814 return {};
815 }
816
817 const VaryingTFB& tfb = it->second;
818 return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
819 tfb.offset, tfb.stride);
820 }
821
822 void DeclareOutputAttribute(Attribute::Index index) {
823 static constexpr std::string_view swizzle = "xyzw";
824 u8 element = 0;
825 while (element < 4) {
826 auto xfb = GetTransformFeedbackDecoration(index, element);
827 if (!xfb.empty()) {
828 xfb = fmt::format(", {}", xfb);
829 }
830 const std::size_t remainder = 4 - element;
831 const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
832 const char* const type = FLOAT_TYPES.at(num_components - 1);
833
834 const u32 location = GetGenericAttributeIndex(index);
835
836 GenericVaryingDescription description;
837 description.first_element = static_cast<u8>(element);
838 description.is_scalar = num_components == 1;
839 description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
840 if (element != 0 || num_components != 4) {
841 const std::string_view name_swizzle = swizzle.substr(element, num_components);
842 description.name = fmt::format("{}_{}", description.name, name_swizzle);
843 }
844 for (std::size_t i = 0; i < num_components; ++i) {
845 const u8 offset = static_cast<u8>(location * 4 + element + i);
846 varying_description.insert({offset, description});
847 }
848
849 code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
850 xfb, type, description.name);
851
852 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
853 }
854 }
855
856 void DeclareConstantBuffers() {
857 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
858 for (const auto& [index, info] : ir.GetConstantBuffers()) {
859 const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
860 const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
861 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
862 GetConstBufferBlock(index));
863 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
864 code.AddLine("}};");
865 code.AddNewLine();
866 }
867 }
868
869 void DeclareGlobalMemory() {
870 u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
871 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
872 // Since we don't know how the shader will use the shader, hint the driver to disable as
873 // much optimizations as possible
874 std::string qualifier = "coherent volatile";
875 if (usage.is_read && !usage.is_written) {
876 qualifier += " readonly";
877 } else if (usage.is_written && !usage.is_read) {
878 qualifier += " writeonly";
879 }
880
881 code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
882 GetGlobalMemoryBlock(base));
883 code.AddLine(" uint {}[];", GetGlobalMemory(base));
884 code.AddLine("}};");
885 code.AddNewLine();
886 }
887 }
888
889 void DeclareSamplers() {
890 u32 binding = device.GetBaseBindings(stage).sampler;
891 for (const auto& sampler : ir.GetSamplers()) {
892 const std::string name = GetSampler(sampler);
893 const std::string description = fmt::format("layout (binding = {}) uniform", binding);
894 binding += sampler.is_indexed ? sampler.size : 1;
895
896 std::string sampler_type = [&]() {
897 if (sampler.is_buffer) {
898 return "samplerBuffer";
899 }
900 switch (sampler.type) {
901 case TextureType::Texture1D:
902 return "sampler1D";
903 case TextureType::Texture2D:
904 return "sampler2D";
905 case TextureType::Texture3D:
906 return "sampler3D";
907 case TextureType::TextureCube:
908 return "samplerCube";
909 default:
910 UNREACHABLE();
911 return "sampler2D";
912 }
913 }();
914 if (sampler.is_array) {
915 sampler_type += "Array";
916 }
917 if (sampler.is_shadow) {
918 sampler_type += "Shadow";
919 }
920
921 if (!sampler.is_indexed) {
922 code.AddLine("{} {} {};", description, sampler_type, name);
923 } else {
924 code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size);
925 }
926 }
927 if (!ir.GetSamplers().empty()) {
928 code.AddNewLine();
929 }
930 }
931
932 void DeclarePhysicalAttributeReader() {
933 if (!ir.HasPhysicalAttributes()) {
934 return;
935 }
936 code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
937 ++code.scope;
938 code.AddLine("switch (physical_address) {{");
939
940 // Just declare generic attributes for now.
941 const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
942 for (u32 index = 0; index < num_attributes; ++index) {
943 const auto attribute{ToGenericAttribute(index)};
944 for (u32 element = 0; element < 4; ++element) {
945 constexpr u32 generic_base = 0x80;
946 constexpr u32 generic_stride = 16;
947 constexpr u32 element_stride = 4;
948 const u32 address{generic_base + index * generic_stride + element * element_stride};
949
950 const bool declared = stage != ShaderType::Fragment ||
951 header.ps.GetPixelImap(index) != PixelImap::Unused;
952 const std::string value =
953 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
954 code.AddLine("case 0x{:X}U: return {};", address, value);
955 }
956 }
957
958 code.AddLine("default: return 0;");
959
960 code.AddLine("}}");
961 --code.scope;
962 code.AddLine("}}");
963 code.AddNewLine();
964 }
965
966 void DeclareImages() {
967 u32 binding = device.GetBaseBindings(stage).image;
968 for (const auto& image : ir.GetImages()) {
969 std::string qualifier = "coherent volatile";
970 if (image.is_read && !image.is_written) {
971 qualifier += " readonly";
972 } else if (image.is_written && !image.is_read) {
973 qualifier += " writeonly";
974 }
975
976 const char* format = image.is_atomic ? "r32ui, " : "";
977 const char* type_declaration = GetImageTypeDeclaration(image.type);
978 code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
979 qualifier, type_declaration, GetImage(image));
980 }
981 if (!ir.GetImages().empty()) {
982 code.AddNewLine();
983 }
984 }
985
986 void VisitBlock(const NodeBlock& bb) {
987 for (const auto& node : bb) {
988 Visit(node).CheckVoid();
989 }
990 }
991
992 Expression Visit(const Node& node) {
993 if (const auto operation = std::get_if<OperationNode>(&*node)) {
994 if (const auto amend_index = operation->GetAmendIndex()) {
995 Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
996 }
997 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
998 if (operation_index >= operation_decompilers.size()) {
999 UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
1000 return {};
1001 }
1002 const auto decompiler = operation_decompilers[operation_index];
1003 if (decompiler == nullptr) {
1004 UNREACHABLE_MSG("Undefined operation: {}", operation_index);
1005 return {};
1006 }
1007 return (this->*decompiler)(*operation);
1008 }
1009
1010 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1011 const u32 index = gpr->GetIndex();
1012 if (index == Register::ZeroIndex) {
1013 return {"0U", Type::Uint};
1014 }
1015 return {GetRegister(index), Type::Float};
1016 }
1017
1018 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1019 const u32 index = cv->GetIndex();
1020 return {GetCustomVariable(index), Type::Float};
1021 }
1022
1023 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1024 const u32 value = immediate->GetValue();
1025 if (value < 10) {
1026 // For eyecandy avoid using hex numbers on single digits
1027 return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
1028 }
1029 return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
1030 }
1031
1032 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1033 const auto value = [&]() -> std::string {
1034 switch (const auto index = predicate->GetIndex(); index) {
1035 case Tegra::Shader::Pred::UnusedIndex:
1036 return "true";
1037 case Tegra::Shader::Pred::NeverExecute:
1038 return "false";
1039 default:
1040 return GetPredicate(index);
1041 }
1042 }();
1043 if (predicate->IsNegated()) {
1044 return {fmt::format("!({})", value), Type::Bool};
1045 }
1046 return {value, Type::Bool};
1047 }
1048
1049 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1050 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
1051 "Physical attributes in geometry shaders are not implemented");
1052 if (abuf->IsPhysicalBuffer()) {
1053 return {fmt::format("ReadPhysicalAttribute({})",
1054 Visit(abuf->GetPhysicalAddress()).AsUint()),
1055 Type::Float};
1056 }
1057 return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
1058 }
1059
1060 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1061 const Node offset = cbuf->GetOffset();
1062
1063 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1064 // Direct access
1065 const u32 offset_imm = immediate->GetValue();
1066 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
1067 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1068 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1069 Type::Uint};
1070 }
1071
1072 // Indirect access
1073 const std::string final_offset = code.GenerateTemporary();
1074 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
1075
1076 if (!device.HasComponentIndexingBug()) {
1077 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
1078 final_offset, final_offset),
1079 Type::Uint};
1080 }
1081
1082 // AMD's proprietary GLSL compiler emits ill code for variable component access.
1083 // To bypass this driver bug generate 4 ifs, one per each component.
1084 const std::string pack = code.GenerateTemporary();
1085 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
1086 final_offset);
1087
1088 const std::string result = code.GenerateTemporary();
1089 code.AddLine("uint {};", result);
1090 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
1091 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
1092 GetSwizzle(swizzle));
1093 }
1094 return {result, Type::Uint};
1095 }
1096
1097 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1098 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
1099 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
1100 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
1101 return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
1102 Type::Uint};
1103 }
1104
1105 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1106 return {
1107 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1108 Type::Uint};
1109 }
1110
1111 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1112 return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1113 }
1114
1115 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1116 return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
1117 }
1118
1119 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1120 if (const auto amend_index = conditional->GetAmendIndex()) {
1121 Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
1122 }
1123 // It's invalid to call conditional on nested nodes, use an operation instead
1124 code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
1125 ++code.scope;
1126
1127 VisitBlock(conditional->GetCode());
1128
1129 --code.scope;
1130 code.AddLine("}}");
1131 return {};
1132 }
1133
1134 if (const auto comment = std::get_if<CommentNode>(&*node)) {
1135 code.AddLine("// " + comment->GetText());
1136 return {};
1137 }
1138
1139 UNREACHABLE();
1140 return {};
1141 }
1142
1143 Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
1144 const auto GeometryPass = [&](std::string_view name) {
1145 if (stage == ShaderType::Geometry && buffer) {
1146 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
1147 // set an 0x80000000 index for those and the shader fails to build. Find out why
1148 // this happens and what's its intent.
1149 return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
1150 max_input_vertices.value());
1151 }
1152 return std::string(name);
1153 };
1154
1155 switch (attribute) {
1156 case Attribute::Index::Position:
1157 switch (stage) {
1158 case ShaderType::Geometry:
1159 return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
1160 GetSwizzle(element)),
1161 Type::Float};
1162 case ShaderType::Fragment:
1163 return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
1164 default:
1165 UNREACHABLE();
1166 return {"0", Type::Int};
1167 }
1168 case Attribute::Index::FrontColor:
1169 return {"gl_Color"s + GetSwizzle(element), Type::Float};
1170 case Attribute::Index::FrontSecondaryColor:
1171 return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
1172 case Attribute::Index::PointCoord:
1173 switch (element) {
1174 case 0:
1175 return {"gl_PointCoord.x", Type::Float};
1176 case 1:
1177 return {"gl_PointCoord.y", Type::Float};
1178 case 2:
1179 case 3:
1180 return {"0.0f", Type::Float};
1181 }
1182 UNREACHABLE();
1183 return {"0", Type::Int};
1184 case Attribute::Index::TessCoordInstanceIDVertexID:
1185 // TODO(Subv): Find out what the values are for the first two elements when inside a
1186 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
1187 // shader.
1188 ASSERT(stage == ShaderType::Vertex);
1189 switch (element) {
1190 case 2:
1191 // Config pack's first value is instance_id.
1192 return {"gl_InstanceID", Type::Int};
1193 case 3:
1194 return {"gl_VertexID", Type::Int};
1195 }
1196 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1197 return {"0", Type::Int};
1198 case Attribute::Index::FrontFacing:
1199 // TODO(Subv): Find out what the values are for the other elements.
1200 ASSERT(stage == ShaderType::Fragment);
1201 switch (element) {
1202 case 3:
1203 return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
1204 }
1205 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1206 return {"0", Type::Int};
1207 default:
1208 if (IsGenericAttribute(attribute)) {
1209 return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
1210 Type::Float};
1211 }
1212 if (IsLegacyTexCoord(attribute)) {
1213 UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
1214 return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
1215 GetSwizzle(element)),
1216 Type::Float};
1217 }
1218 break;
1219 }
1220 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
1221 return {"0", Type::Int};
1222 }
1223
1224 Expression ApplyPrecise(Operation operation, std::string value, Type type) {
1225 if (!IsPrecise(operation)) {
1226 return {std::move(value), type};
1227 }
1228 // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
1229 // be found in fragment shaders, so we disable precise there. There are vertex shaders that
1230 // also fail to build but nobody seems to care about those.
1231 // Note: Only bugged drivers will skip precise.
1232 const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
1233
1234 std::string temporary = code.GenerateTemporary();
1235 code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
1236 temporary, value);
1237 return {std::move(temporary), type};
1238 }
1239
1240 Expression VisitOperand(Operation operation, std::size_t operand_index) {
1241 const auto& operand = operation[operand_index];
1242 const bool parent_precise = IsPrecise(operation);
1243 const bool child_precise = IsPrecise(operand);
1244 const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
1245 if (!parent_precise || child_precise || child_trivial) {
1246 return Visit(operand);
1247 }
1248
1249 Expression value = Visit(operand);
1250 std::string temporary = code.GenerateTemporary();
1251 code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
1252 return {std::move(temporary), value.GetType()};
1253 }
1254
1255 std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
1256 const u32 element = abuf->GetElement();
1257 switch (const auto attribute = abuf->GetIndex()) {
1258 case Attribute::Index::Position:
1259 return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
1260 case Attribute::Index::LayerViewportPointSize:
1261 switch (element) {
1262 case 0:
1263 UNIMPLEMENTED();
1264 return std::nullopt;
1265 case 1:
1266 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1267 return std::nullopt;
1268 }
1269 return {{"gl_Layer", Type::Int}};
1270 case 2:
1271 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1272 return std::nullopt;
1273 }
1274 return {{"gl_ViewportIndex", Type::Int}};
1275 case 3:
1276 return {{"gl_PointSize", Type::Float}};
1277 }
1278 return std::nullopt;
1279 case Attribute::Index::FrontColor:
1280 return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
1281 case Attribute::Index::FrontSecondaryColor:
1282 return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
1283 case Attribute::Index::BackColor:
1284 return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
1285 case Attribute::Index::BackSecondaryColor:
1286 return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
1287 case Attribute::Index::ClipDistances0123:
1288 return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
1289 case Attribute::Index::ClipDistances4567:
1290 return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
1291 default:
1292 if (IsGenericAttribute(attribute)) {
1293 return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
1294 }
1295 if (IsLegacyTexCoord(attribute)) {
1296 return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
1297 GetSwizzle(element)),
1298 Type::Float}};
1299 }
1300 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
1301 return std::nullopt;
1302 }
1303 }
1304
1305 Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
1306 Type type_a) {
1307 std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
1308 return ApplyPrecise(operation, std::move(op_str), result_type);
1309 }
1310
1311 Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
1312 Type type_a, Type type_b) {
1313 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1314 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1315 std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
1316
1317 return ApplyPrecise(operation, std::move(op_str), result_type);
1318 }
1319
1320 Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
1321 Type type_a, Type type_b) {
1322 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1323 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1324 std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
1325
1326 return ApplyPrecise(operation, std::move(op_str), result_type);
1327 }
1328
1329 Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
1330 Type type_a, Type type_b, Type type_c) {
1331 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1332 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1333 const std::string op_c = VisitOperand(operation, 2).As(type_c);
1334 std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
1335
1336 return ApplyPrecise(operation, std::move(op_str), result_type);
1337 }
1338
1339 Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
1340 Type type_a, Type type_b, Type type_c, Type type_d) {
1341 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1342 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1343 const std::string op_c = VisitOperand(operation, 2).As(type_c);
1344 const std::string op_d = VisitOperand(operation, 3).As(type_d);
1345 std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
1346
1347 return ApplyPrecise(operation, std::move(op_str), result_type);
1348 }
1349
1350 std::string GenerateTexture(Operation operation, const std::string& function_suffix,
1351 const std::vector<TextureIR>& extras, bool separate_dc = false) {
1352 constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
1353
1354 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1355 ASSERT(meta);
1356
1357 const std::size_t count = operation.GetOperandsCount();
1358 const bool has_array = meta->sampler.is_array;
1359 const bool has_shadow = meta->sampler.is_shadow;
1360 const bool workaround_lod_array_shadow_as_grad =
1361 !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
1362 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
1363 meta->sampler.type == TextureType::TextureCube);
1364
1365 std::string expr = "texture";
1366
1367 if (workaround_lod_array_shadow_as_grad) {
1368 expr += "Grad";
1369 } else {
1370 expr += function_suffix;
1371 }
1372
1373 if (!meta->aoffi.empty()) {
1374 expr += "Offset";
1375 } else if (!meta->ptp.empty()) {
1376 expr += "Offsets";
1377 }
1378 if (!meta->sampler.is_indexed) {
1379 expr += '(' + GetSampler(meta->sampler) + ", ";
1380 } else {
1381 expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
1382 }
1383 expr += coord_constructors.at(count + (has_array ? 1 : 0) +
1384 (has_shadow && !separate_dc ? 1 : 0) - 1);
1385 expr += '(';
1386 for (std::size_t i = 0; i < count; ++i) {
1387 expr += Visit(operation[i]).AsFloat();
1388
1389 const std::size_t next = i + 1;
1390 if (next < count)
1391 expr += ", ";
1392 }
1393 if (has_array) {
1394 expr += ", float(" + Visit(meta->array).AsInt() + ')';
1395 }
1396 if (has_shadow) {
1397 if (separate_dc) {
1398 expr += "), " + Visit(meta->depth_compare).AsFloat();
1399 } else {
1400 expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
1401 }
1402 } else {
1403 expr += ')';
1404 }
1405
1406 if (workaround_lod_array_shadow_as_grad) {
1407 switch (meta->sampler.type) {
1408 case TextureType::Texture2D:
1409 return expr + ", vec2(0.0), vec2(0.0))";
1410 case TextureType::TextureCube:
1411 return expr + ", vec3(0.0), vec3(0.0))";
1412 default:
1413 UNREACHABLE();
1414 break;
1415 }
1416 }
1417
1418 for (const auto& variant : extras) {
1419 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
1420 expr += GenerateTextureArgument(*argument);
1421 } else if (std::holds_alternative<TextureOffset>(variant)) {
1422 if (!meta->aoffi.empty()) {
1423 expr += GenerateTextureAoffi(meta->aoffi);
1424 } else if (!meta->ptp.empty()) {
1425 expr += GenerateTexturePtp(meta->ptp);
1426 }
1427 } else if (std::holds_alternative<TextureDerivates>(variant)) {
1428 expr += GenerateTextureDerivates(meta->derivates);
1429 } else {
1430 UNREACHABLE();
1431 }
1432 }
1433
1434 return expr + ')';
1435 }
1436
1437 std::string GenerateTextureArgument(const TextureArgument& argument) {
1438 const auto& [type, operand] = argument;
1439 if (operand == nullptr) {
1440 return {};
1441 }
1442
1443 std::string expr = ", ";
1444 switch (type) {
1445 case Type::Int:
1446 if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
1447 // Inline the string as an immediate integer in GLSL (some extra arguments are
1448 // required to be constant)
1449 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
1450 } else {
1451 expr += Visit(operand).AsInt();
1452 }
1453 break;
1454 case Type::Float:
1455 expr += Visit(operand).AsFloat();
1456 break;
1457 default: {
1458 const auto type_int = static_cast<u32>(type);
1459 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
1460 expr += '0';
1461 break;
1462 }
1463 }
1464 return expr;
1465 }
1466
1467 std::string ReadTextureOffset(const Node& value) {
1468 if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
1469 // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
1470 // to be constant by the standard).
1471 return std::to_string(static_cast<s32>(immediate->GetValue()));
1472 } else if (device.HasVariableAoffi()) {
1473 // Avoid using variable AOFFI on unsupported devices.
1474 return Visit(value).AsInt();
1475 } else {
1476 // Insert 0 on devices not supporting variable AOFFI.
1477 return "0";
1478 }
1479 }
1480
1481 std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
1482 if (aoffi.empty()) {
1483 return {};
1484 }
1485 constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
1486 std::string expr = ", ";
1487 expr += coord_constructors.at(aoffi.size() - 1);
1488 expr += '(';
1489
1490 for (std::size_t index = 0; index < aoffi.size(); ++index) {
1491 expr += ReadTextureOffset(aoffi.at(index));
1492 if (index + 1 < aoffi.size()) {
1493 expr += ", ";
1494 }
1495 }
1496 expr += ')';
1497
1498 return expr;
1499 }
1500
1501 std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
1502 static constexpr std::size_t num_vectors = 4;
1503 ASSERT(ptp.size() == num_vectors * 2);
1504
1505 std::string expr = ", ivec2[](";
1506 for (std::size_t vector = 0; vector < num_vectors; ++vector) {
1507 const bool has_next = vector + 1 < num_vectors;
1508 expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
1509 ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
1510 }
1511 expr += ')';
1512 return expr;
1513 }
1514
1515 std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
1516 if (derivates.empty()) {
1517 return {};
1518 }
1519 constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
1520 std::string expr = ", ";
1521 const std::size_t components = derivates.size() / 2;
1522 std::string dx = coord_constructors.at(components - 1);
1523 std::string dy = coord_constructors.at(components - 1);
1524 dx += '(';
1525 dy += '(';
1526
1527 for (std::size_t index = 0; index < components; ++index) {
1528 const auto& operand_x{derivates.at(index * 2)};
1529 const auto& operand_y{derivates.at(index * 2 + 1)};
1530 dx += Visit(operand_x).AsFloat();
1531 dy += Visit(operand_y).AsFloat();
1532
1533 if (index + 1 < components) {
1534 dx += ", ";
1535 dy += ", ";
1536 }
1537 }
1538 dx += ')';
1539 dy += ')';
1540 expr += dx + ", " + dy;
1541
1542 return expr;
1543 }
1544
1545 std::string BuildIntegerCoordinates(Operation operation) {
1546 constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
1547 const std::size_t coords_count{operation.GetOperandsCount()};
1548 std::string expr = constructors.at(coords_count - 1);
1549 for (std::size_t i = 0; i < coords_count; ++i) {
1550 expr += VisitOperand(operation, i).AsInt();
1551 if (i + 1 < coords_count) {
1552 expr += ", ";
1553 }
1554 }
1555 expr += ')';
1556 return expr;
1557 }
1558
1559 std::string BuildImageValues(Operation operation) {
1560 constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
1561 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1562
1563 const std::size_t values_count{meta.values.size()};
1564 std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
1565 for (std::size_t i = 0; i < values_count; ++i) {
1566 expr += Visit(meta.values.at(i)).AsUint();
1567 if (i + 1 < values_count) {
1568 expr += ", ";
1569 }
1570 }
1571 expr += ')';
1572 return expr;
1573 }
1574
1575 Expression Assign(Operation operation) {
1576 const Node& dest = operation[0];
1577 const Node& src = operation[1];
1578
1579 Expression target;
1580 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1581 if (gpr->GetIndex() == Register::ZeroIndex) {
1582 // Writing to Register::ZeroIndex is a no op but we still have to visit the source
1583 // as it might have side effects.
1584 code.AddLine("{};", Visit(src).GetCode());
1585 return {};
1586 }
1587 target = {GetRegister(gpr->GetIndex()), Type::Float};
1588 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1589 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
1590 auto output = GetOutputAttribute(abuf);
1591 if (!output) {
1592 return {};
1593 }
1594 target = std::move(*output);
1595 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1596 target = {
1597 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1598 Type::Uint};
1599 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1600 ASSERT(stage == ShaderType::Compute);
1601 target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1602 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1603 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
1604 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
1605 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
1606 target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
1607 Type::Uint};
1608 } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
1609 target = {GetCustomVariable(cv->GetIndex()), Type::Float};
1610 } else {
1611 UNREACHABLE_MSG("Assign called without a proper target");
1612 }
1613
1614 code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
1615 return {};
1616 }
1617
1618 template <Type type>
1619 Expression Add(Operation operation) {
1620 return GenerateBinaryInfix(operation, "+", type, type, type);
1621 }
1622
1623 template <Type type>
1624 Expression Mul(Operation operation) {
1625 return GenerateBinaryInfix(operation, "*", type, type, type);
1626 }
1627
1628 template <Type type>
1629 Expression Div(Operation operation) {
1630 return GenerateBinaryInfix(operation, "/", type, type, type);
1631 }
1632
1633 template <Type type>
1634 Expression Fma(Operation operation) {
1635 return GenerateTernary(operation, "fma", type, type, type, type);
1636 }
1637
1638 template <Type type>
1639 Expression Negate(Operation operation) {
1640 return GenerateUnary(operation, "-", type, type);
1641 }
1642
1643 template <Type type>
1644 Expression Absolute(Operation operation) {
1645 return GenerateUnary(operation, "abs", type, type);
1646 }
1647
1648 Expression FClamp(Operation operation) {
1649 return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
1650 Type::Float);
1651 }
1652
1653 Expression FCastHalf0(Operation operation) {
1654 return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1655 }
1656
1657 Expression FCastHalf1(Operation operation) {
1658 return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1659 }
1660
1661 template <Type type>
1662 Expression Min(Operation operation) {
1663 return GenerateBinaryCall(operation, "min", type, type, type);
1664 }
1665
1666 template <Type type>
1667 Expression Max(Operation operation) {
1668 return GenerateBinaryCall(operation, "max", type, type, type);
1669 }
1670
1671 Expression Select(Operation operation) {
1672 const std::string condition = Visit(operation[0]).AsBool();
1673 const std::string true_case = Visit(operation[1]).AsUint();
1674 const std::string false_case = Visit(operation[2]).AsUint();
1675 std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
1676
1677 return ApplyPrecise(operation, std::move(op_str), Type::Uint);
1678 }
1679
1680 Expression FCos(Operation operation) {
1681 return GenerateUnary(operation, "cos", Type::Float, Type::Float);
1682 }
1683
1684 Expression FSin(Operation operation) {
1685 return GenerateUnary(operation, "sin", Type::Float, Type::Float);
1686 }
1687
1688 Expression FExp2(Operation operation) {
1689 return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
1690 }
1691
1692 Expression FLog2(Operation operation) {
1693 return GenerateUnary(operation, "log2", Type::Float, Type::Float);
1694 }
1695
1696 Expression FInverseSqrt(Operation operation) {
1697 return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
1698 }
1699
1700 Expression FSqrt(Operation operation) {
1701 return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
1702 }
1703
1704 Expression FRoundEven(Operation operation) {
1705 return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
1706 }
1707
1708 Expression FFloor(Operation operation) {
1709 return GenerateUnary(operation, "floor", Type::Float, Type::Float);
1710 }
1711
1712 Expression FCeil(Operation operation) {
1713 return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
1714 }
1715
1716 Expression FTrunc(Operation operation) {
1717 return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
1718 }
1719
1720 template <Type type>
1721 Expression FCastInteger(Operation operation) {
1722 return GenerateUnary(operation, "float", Type::Float, type);
1723 }
1724
1725 Expression FSwizzleAdd(Operation operation) {
1726 const std::string op_a = VisitOperand(operation, 0).AsFloat();
1727 const std::string op_b = VisitOperand(operation, 1).AsFloat();
1728
1729 if (!device.HasShaderBallot()) {
1730 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
1731 return {fmt::format("{} + {}", op_a, op_b), Type::Float};
1732 }
1733
1734 const std::string instr_mask = VisitOperand(operation, 2).AsUint();
1735 const std::string mask = code.GenerateTemporary();
1736 code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
1737 instr_mask);
1738
1739 const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
1740 const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
1741 return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
1742 Type::Float};
1743 }
1744
1745 Expression ICastFloat(Operation operation) {
1746 return GenerateUnary(operation, "int", Type::Int, Type::Float);
1747 }
1748
1749 Expression ICastUnsigned(Operation operation) {
1750 return GenerateUnary(operation, "int", Type::Int, Type::Uint);
1751 }
1752
1753 template <Type type>
1754 Expression LogicalShiftLeft(Operation operation) {
1755 return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
1756 }
1757
1758 Expression ILogicalShiftRight(Operation operation) {
1759 const std::string op_a = VisitOperand(operation, 0).AsUint();
1760 const std::string op_b = VisitOperand(operation, 1).AsUint();
1761 std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
1762
1763 return ApplyPrecise(operation, std::move(op_str), Type::Int);
1764 }
1765
1766 Expression IArithmeticShiftRight(Operation operation) {
1767 return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
1768 }
1769
1770 template <Type type>
1771 Expression BitwiseAnd(Operation operation) {
1772 return GenerateBinaryInfix(operation, "&", type, type, type);
1773 }
1774
1775 template <Type type>
1776 Expression BitwiseOr(Operation operation) {
1777 return GenerateBinaryInfix(operation, "|", type, type, type);
1778 }
1779
1780 template <Type type>
1781 Expression BitwiseXor(Operation operation) {
1782 return GenerateBinaryInfix(operation, "^", type, type, type);
1783 }
1784
1785 template <Type type>
1786 Expression BitwiseNot(Operation operation) {
1787 return GenerateUnary(operation, "~", type, type);
1788 }
1789
1790 Expression UCastFloat(Operation operation) {
1791 return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
1792 }
1793
1794 Expression UCastSigned(Operation operation) {
1795 return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
1796 }
1797
1798 Expression UShiftRight(Operation operation) {
1799 return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
1800 }
1801
1802 template <Type type>
1803 Expression BitfieldInsert(Operation operation) {
1804 return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
1805 Type::Int);
1806 }
1807
1808 template <Type type>
1809 Expression BitfieldExtract(Operation operation) {
1810 return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
1811 }
1812
1813 template <Type type>
1814 Expression BitCount(Operation operation) {
1815 return GenerateUnary(operation, "bitCount", type, type);
1816 }
1817
1818 template <Type type>
1819 Expression BitMSB(Operation operation) {
1820 return GenerateUnary(operation, "findMSB", type, type);
1821 }
1822
1823 Expression HNegate(Operation operation) {
1824 const auto GetNegate = [&](std::size_t index) {
1825 return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
1826 };
1827 return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
1828 GetNegate(1), GetNegate(2)),
1829 Type::HalfFloat};
1830 }
1831
1832 Expression HClamp(Operation operation) {
1833 const std::string value = VisitOperand(operation, 0).AsHalfFloat();
1834 const std::string min = VisitOperand(operation, 1).AsFloat();
1835 const std::string max = VisitOperand(operation, 2).AsFloat();
1836 std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
1837
1838 return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
1839 }
1840
1841 Expression HCastFloat(Operation operation) {
1842 return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()),
1843 Type::HalfFloat};
1844 }
1845
1846 Expression HUnpack(Operation operation) {
1847 Expression operand = VisitOperand(operation, 0);
1848 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1849 case Tegra::Shader::HalfType::H0_H1:
1850 return operand;
1851 case Tegra::Shader::HalfType::F32:
1852 return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
1853 case Tegra::Shader::HalfType::H0_H0:
1854 return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
1855 case Tegra::Shader::HalfType::H1_H1:
1856 return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
1857 }
1858 UNREACHABLE();
1859 return {"0", Type::Int};
1860 }
1861
1862 Expression HMergeF32(Operation operation) {
1863 return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1864 }
1865
1866 Expression HMergeH0(Operation operation) {
1867 const std::string dest = VisitOperand(operation, 0).AsUint();
1868 const std::string src = VisitOperand(operation, 1).AsUint();
1869 return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
1870 Type::HalfFloat};
1871 }
1872
1873 Expression HMergeH1(Operation operation) {
1874 const std::string dest = VisitOperand(operation, 0).AsUint();
1875 const std::string src = VisitOperand(operation, 1).AsUint();
1876 return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
1877 Type::HalfFloat};
1878 }
1879
1880 Expression HPack2(Operation operation) {
1881 return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
1882 VisitOperand(operation, 1).AsFloat()),
1883 Type::HalfFloat};
1884 }
1885
1886 template <const std::string_view& op, Type type, bool unordered = false>
1887 Expression Comparison(Operation operation) {
1888 static_assert(!unordered || type == Type::Float);
1889
1890 Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
1891
1892 if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
1893 // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
1894 // and Nvidia's proprietary stacks. Manually force an ordered comparison.
1895 return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
1896 VisitOperand(operation, 0).AsFloat(),
1897 VisitOperand(operation, 1).AsFloat()),
1898 Type::Bool};
1899 }
1900 if constexpr (!unordered) {
1901 return expr;
1902 }
1903 // Unordered comparisons are always true for NaN operands.
1904 return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
1905 VisitOperand(operation, 0).AsFloat(),
1906 VisitOperand(operation, 1).AsFloat()),
1907 Type::Bool};
1908 }
1909
1910 Expression FOrdered(Operation operation) {
1911 return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
1912 VisitOperand(operation, 1).AsFloat()),
1913 Type::Bool};
1914 }
1915
1916 Expression FUnordered(Operation operation) {
1917 return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
1918 VisitOperand(operation, 1).AsFloat()),
1919 Type::Bool};
1920 }
1921
1922 Expression LogicalAddCarry(Operation operation) {
1923 const std::string carry = code.GenerateTemporary();
1924 code.AddLine("uint {};", carry);
1925 code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(),
1926 VisitOperand(operation, 1).AsUint(), carry);
1927 return {fmt::format("({} != 0)", carry), Type::Bool};
1928 }
1929
1930 Expression LogicalAssign(Operation operation) {
1931 const Node& dest = operation[0];
1932 const Node& src = operation[1];
1933
1934 std::string target;
1935
1936 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1937 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1938
1939 const auto index = pred->GetIndex();
1940 switch (index) {
1941 case Tegra::Shader::Pred::NeverExecute:
1942 case Tegra::Shader::Pred::UnusedIndex:
1943 // Writing to these predicates is a no-op
1944 return {};
1945 }
1946 target = GetPredicate(index);
1947 } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
1948 target = GetInternalFlag(flag->GetFlag());
1949 }
1950
1951 code.AddLine("{} = {};", target, Visit(src).AsBool());
1952 return {};
1953 }
1954
1955 Expression LogicalAnd(Operation operation) {
1956 return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
1957 }
1958
1959 Expression LogicalOr(Operation operation) {
1960 return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
1961 }
1962
1963 Expression LogicalXor(Operation operation) {
1964 return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
1965 }
1966
1967 Expression LogicalNegate(Operation operation) {
1968 return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
1969 }
1970
1971 Expression LogicalPick2(Operation operation) {
1972 return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
1973 VisitOperand(operation, 1).AsUint()),
1974 Type::Bool};
1975 }
1976
1977 Expression LogicalAnd2(Operation operation) {
1978 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1979 }
1980
1981 template <bool with_nan>
1982 Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
1983 Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
1984 Type::HalfFloat, Type::HalfFloat);
1985 if constexpr (!with_nan) {
1986 return comparison;
1987 }
1988 return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
1989 VisitOperand(operation, 0).AsHalfFloat(),
1990 VisitOperand(operation, 1).AsHalfFloat()),
1991 Type::Bool2};
1992 }
1993
1994 template <bool with_nan>
1995 Expression Logical2HLessThan(Operation operation) {
1996 return GenerateHalfComparison<with_nan>(operation, "lessThan");
1997 }
1998
1999 template <bool with_nan>
2000 Expression Logical2HEqual(Operation operation) {
2001 return GenerateHalfComparison<with_nan>(operation, "equal");
2002 }
2003
2004 template <bool with_nan>
2005 Expression Logical2HLessEqual(Operation operation) {
2006 return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
2007 }
2008
2009 template <bool with_nan>
2010 Expression Logical2HGreaterThan(Operation operation) {
2011 return GenerateHalfComparison<with_nan>(operation, "greaterThan");
2012 }
2013
2014 template <bool with_nan>
2015 Expression Logical2HNotEqual(Operation operation) {
2016 return GenerateHalfComparison<with_nan>(operation, "notEqual");
2017 }
2018
2019 template <bool with_nan>
2020 Expression Logical2HGreaterEqual(Operation operation) {
2021 return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
2022 }
2023
2024 Expression Texture(Operation operation) {
2025 const auto meta = std::get<MetaTexture>(operation.GetMeta());
2026 const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
2027 meta.sampler.is_array && meta.sampler.is_shadow;
2028 // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
2029 const std::vector<TextureIR> extras{
2030 TextureOffset{},
2031 TextureArgument{Type::Float, meta.bias},
2032 };
2033 std::string expr = GenerateTexture(operation, "", extras, separate_dc);
2034 if (meta.sampler.is_shadow) {
2035 expr = fmt::format("vec4({})", expr);
2036 }
2037 return {expr + GetSwizzle(meta.element), Type::Float};
2038 }
2039
2040 Expression TextureLod(Operation operation) {
2041 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2042 ASSERT(meta);
2043
2044 std::string expr{};
2045
2046 if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
2047 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
2048 meta->sampler.type == TextureType::TextureCube)) {
2049 LOG_ERROR(Render_OpenGL,
2050 "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
2051 expr = GenerateTexture(operation, "Lod", {});
2052 } else {
2053 expr = GenerateTexture(operation, "Lod",
2054 {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
2055 }
2056
2057 if (meta->sampler.is_shadow) {
2058 expr = "vec4(" + expr + ')';
2059 }
2060 return {expr + GetSwizzle(meta->element), Type::Float};
2061 }
2062
2063 Expression TextureGather(Operation operation) {
2064 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
2065
2066 const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
2067 const bool separate_dc = meta.sampler.is_shadow;
2068
2069 std::vector<TextureIR> ir_;
2070 if (meta.sampler.is_shadow) {
2071 ir_ = {TextureOffset{}};
2072 } else {
2073 ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
2074 }
2075 return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
2076 Type::Float};
2077 }
2078
2079 Expression TextureQueryDimensions(Operation operation) {
2080 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2081 ASSERT(meta);
2082
2083 const std::string sampler = GetSampler(meta->sampler);
2084 const std::string lod = VisitOperand(operation, 0).AsInt();
2085
2086 switch (meta->element) {
2087 case 0:
2088 case 1:
2089 return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
2090 Type::Int};
2091 case 3:
2092 return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
2093 }
2094 UNREACHABLE();
2095 return {"0", Type::Int};
2096 }
2097
2098 Expression TextureQueryLod(Operation operation) {
2099 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2100 ASSERT(meta);
2101
2102 if (meta->element < 2) {
2103 return {fmt::format("int(({} * vec2(256)){})",
2104 GenerateTexture(operation, "QueryLod", {}),
2105 GetSwizzle(meta->element)),
2106 Type::Int};
2107 }
2108 return {"0", Type::Int};
2109 }
2110
2111 Expression TexelFetch(Operation operation) {
2112 constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
2113 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2114 ASSERT(meta);
2115 UNIMPLEMENTED_IF(meta->sampler.is_array);
2116 const std::size_t count = operation.GetOperandsCount();
2117
2118 std::string expr = "texelFetch(";
2119 expr += GetSampler(meta->sampler);
2120 expr += ", ";
2121
2122 expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
2123 expr += '(';
2124 for (std::size_t i = 0; i < count; ++i) {
2125 if (i > 0) {
2126 expr += ", ";
2127 }
2128 expr += VisitOperand(operation, i).AsInt();
2129 }
2130 if (meta->array) {
2131 expr += ", ";
2132 expr += Visit(meta->array).AsInt();
2133 }
2134 expr += ')';
2135
2136 if (meta->lod && !meta->sampler.is_buffer) {
2137 expr += ", ";
2138 expr += Visit(meta->lod).AsInt();
2139 }
2140 expr += ')';
2141 expr += GetSwizzle(meta->element);
2142
2143 return {std::move(expr), Type::Float};
2144 }
2145
2146 Expression TextureGradient(Operation operation) {
2147 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
2148 std::string expr =
2149 GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
2150 return {std::move(expr) + GetSwizzle(meta.element), Type::Float};
2151 }
2152
2153 Expression ImageLoad(Operation operation) {
2154 if (!device.HasImageLoadFormatted()) {
2155 LOG_ERROR(Render_OpenGL,
2156 "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load");
2157 return {"0", Type::Int};
2158 }
2159
2160 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2161 return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
2162 BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
2163 Type::Uint};
2164 }
2165
2166 Expression ImageStore(Operation operation) {
2167 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2168 code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
2169 BuildIntegerCoordinates(operation), BuildImageValues(operation));
2170 return {};
2171 }
2172
2173 template <const std::string_view& opname>
2174 Expression AtomicImage(Operation operation) {
2175 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2176 ASSERT(meta.values.size() == 1);
2177
2178 return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
2179 BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()),
2180 Type::Uint};
2181 }
2182
2183 template <const std::string_view& opname, Type type>
2184 Expression Atomic(Operation operation) {
2185 if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
2186 UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
2187 return {};
2188 }
2189 return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
2190 Visit(operation[1]).AsUint()),
2191 Type::Uint};
2192 }
2193
2194 template <const std::string_view& opname, Type type>
2195 Expression Reduce(Operation operation) {
2196 code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
2197 return {};
2198 }
2199
2200 Expression Branch(Operation operation) {
2201 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
2202 UNIMPLEMENTED_IF(!target);
2203
2204 code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
2205 code.AddLine("break;");
2206 return {};
2207 }
2208
2209 Expression BranchIndirect(Operation operation) {
2210 const std::string op_a = VisitOperand(operation, 0).AsUint();
2211
2212 code.AddLine("jmp_to = {};", op_a);
2213 code.AddLine("break;");
2214 return {};
2215 }
2216
2217 Expression PushFlowStack(Operation operation) {
2218 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2219 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
2220 UNIMPLEMENTED_IF(!target);
2221
2222 code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
2223 target->GetValue());
2224 return {};
2225 }
2226
2227 Expression PopFlowStack(Operation operation) {
2228 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2229 code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
2230 code.AddLine("break;");
2231 return {};
2232 }
2233
2234 void PreExit() {
2235 if (stage != ShaderType::Fragment) {
2236 return;
2237 }
2238 const auto& used_registers = ir.GetRegisters();
2239 const auto SafeGetRegister = [&](u32 reg) -> Expression {
2240 // TODO(Rodrigo): Replace with contains once C++20 releases
2241 if (used_registers.find(reg) != used_registers.end()) {
2242 return {GetRegister(reg), Type::Float};
2243 }
2244 return {"0.0f", Type::Float};
2245 };
2246
2247 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
2248
2249 // Write the color outputs using the data in the shader registers, disabled
2250 // rendertargets/components are skipped in the register assignment.
2251 u32 current_reg = 0;
2252 for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
2253 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
2254 for (u32 component = 0; component < 4; ++component) {
2255 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
2256 code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
2257 SafeGetRegister(current_reg).AsFloat());
2258 ++current_reg;
2259 }
2260 }
2261 }
2262 if (header.ps.omap.depth) {
2263 // The depth output is always 2 registers after the last color output, and current_reg
2264 // already contains one past the last color register.
2265 code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
2266 }
2267 }
2268
2269 Expression Exit(Operation operation) {
2270 PreExit();
2271 code.AddLine("return;");
2272 return {};
2273 }
2274
2275 Expression Discard(Operation operation) {
2276 // Enclose "discard" in a conditional, so that GLSL compilation does not complain
2277 // about unexecuted instructions that may follow this.
2278 code.AddLine("if (true) {{");
2279 ++code.scope;
2280 code.AddLine("discard;");
2281 --code.scope;
2282 code.AddLine("}}");
2283 return {};
2284 }
2285
2286 Expression EmitVertex(Operation operation) {
2287 ASSERT_MSG(stage == ShaderType::Geometry,
2288 "EmitVertex is expected to be used in a geometry shader.");
2289 code.AddLine("EmitVertex();");
2290 return {};
2291 }
2292
2293 Expression EndPrimitive(Operation operation) {
2294 ASSERT_MSG(stage == ShaderType::Geometry,
2295 "EndPrimitive is expected to be used in a geometry shader.");
2296 code.AddLine("EndPrimitive();");
2297 return {};
2298 }
2299
2300 Expression InvocationId(Operation operation) {
2301 return {"gl_InvocationID", Type::Int};
2302 }
2303
2304 Expression YNegate(Operation operation) {
2305 // Y_NEGATE is mapped to this uniform value
2306 return {"gl_FrontMaterial.ambient.a", Type::Float};
2307 }
2308
2309 template <u32 element>
2310 Expression LocalInvocationId(Operation) {
2311 return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
2312 }
2313
2314 template <u32 element>
2315 Expression WorkGroupId(Operation) {
2316 return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
2317 }
2318
2319 Expression BallotThread(Operation operation) {
2320 const std::string value = VisitOperand(operation, 0).AsBool();
2321 if (!device.HasWarpIntrinsics()) {
2322 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2323 // Stub on non-Nvidia devices by simulating all threads voting the same as the active
2324 // one.
2325 return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
2326 }
2327 return {fmt::format("ballotThreadNV({})", value), Type::Uint};
2328 }
2329
2330 Expression Vote(Operation operation, const char* func) {
2331 const std::string value = VisitOperand(operation, 0).AsBool();
2332 if (!device.HasWarpIntrinsics()) {
2333 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2334 // Stub with a warp size of one.
2335 return {value, Type::Bool};
2336 }
2337 return {fmt::format("{}({})", func, value), Type::Bool};
2338 }
2339
2340 Expression VoteAll(Operation operation) {
2341 return Vote(operation, "allThreadsNV");
2342 }
2343
2344 Expression VoteAny(Operation operation) {
2345 return Vote(operation, "anyThreadNV");
2346 }
2347
2348 Expression VoteEqual(Operation operation) {
2349 if (!device.HasWarpIntrinsics()) {
2350 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2351 // We must return true here since a stub for a theoretical warp size of 1.
2352 // This will always return an equal result across all votes.
2353 return {"true", Type::Bool};
2354 }
2355 return Vote(operation, "allThreadsEqualNV");
2356 }
2357
2358 Expression ThreadId(Operation operation) {
2359 if (!device.HasShaderBallot()) {
2360 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
2361 return {"0U", Type::Uint};
2362 }
2363 return {"gl_SubGroupInvocationARB", Type::Uint};
2364 }
2365
2366 template <const std::string_view& comparison>
2367 Expression ThreadMask(Operation) {
2368 if (device.HasWarpIntrinsics()) {
2369 return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
2370 }
2371 if (device.HasShaderBallot()) {
2372 return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
2373 }
2374 LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
2375 return {"0U", Type::Uint};
2376 }
2377
2378 Expression ShuffleIndexed(Operation operation) {
2379 std::string value = VisitOperand(operation, 0).AsFloat();
2380
2381 if (!device.HasShaderBallot()) {
2382 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
2383 return {std::move(value), Type::Float};
2384 }
2385
2386 const std::string index = VisitOperand(operation, 1).AsUint();
2387 return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
2388 }
2389
2390 Expression Barrier(Operation) {
2391 if (!ir.IsDecompiled()) {
2392 LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
2393 return {};
2394 }
2395 code.AddLine("barrier();");
2396 return {};
2397 }
2398
2399 Expression MemoryBarrierGroup(Operation) {
2400 code.AddLine("groupMemoryBarrier();");
2401 return {};
2402 }
2403
2404 Expression MemoryBarrierGlobal(Operation) {
2405 code.AddLine("memoryBarrier();");
2406 return {};
2407 }
2408
2409 struct Func final {
2410 Func() = delete;
2411 ~Func() = delete;
2412
2413 static constexpr std::string_view LessThan = "<";
2414 static constexpr std::string_view Equal = "==";
2415 static constexpr std::string_view LessEqual = "<=";
2416 static constexpr std::string_view GreaterThan = ">";
2417 static constexpr std::string_view NotEqual = "!=";
2418 static constexpr std::string_view GreaterEqual = ">=";
2419
2420 static constexpr std::string_view Eq = "Eq";
2421 static constexpr std::string_view Ge = "Ge";
2422 static constexpr std::string_view Gt = "Gt";
2423 static constexpr std::string_view Le = "Le";
2424 static constexpr std::string_view Lt = "Lt";
2425
2426 static constexpr std::string_view Add = "Add";
2427 static constexpr std::string_view Min = "Min";
2428 static constexpr std::string_view Max = "Max";
2429 static constexpr std::string_view And = "And";
2430 static constexpr std::string_view Or = "Or";
2431 static constexpr std::string_view Xor = "Xor";
2432 static constexpr std::string_view Exchange = "Exchange";
2433 };
2434
2435 static constexpr std::array operation_decompilers = {
2436 &GLSLDecompiler::Assign,
2437
2438 &GLSLDecompiler::Select,
2439
2440 &GLSLDecompiler::Add<Type::Float>,
2441 &GLSLDecompiler::Mul<Type::Float>,
2442 &GLSLDecompiler::Div<Type::Float>,
2443 &GLSLDecompiler::Fma<Type::Float>,
2444 &GLSLDecompiler::Negate<Type::Float>,
2445 &GLSLDecompiler::Absolute<Type::Float>,
2446 &GLSLDecompiler::FClamp,
2447 &GLSLDecompiler::FCastHalf0,
2448 &GLSLDecompiler::FCastHalf1,
2449 &GLSLDecompiler::Min<Type::Float>,
2450 &GLSLDecompiler::Max<Type::Float>,
2451 &GLSLDecompiler::FCos,
2452 &GLSLDecompiler::FSin,
2453 &GLSLDecompiler::FExp2,
2454 &GLSLDecompiler::FLog2,
2455 &GLSLDecompiler::FInverseSqrt,
2456 &GLSLDecompiler::FSqrt,
2457 &GLSLDecompiler::FRoundEven,
2458 &GLSLDecompiler::FFloor,
2459 &GLSLDecompiler::FCeil,
2460 &GLSLDecompiler::FTrunc,
2461 &GLSLDecompiler::FCastInteger<Type::Int>,
2462 &GLSLDecompiler::FCastInteger<Type::Uint>,
2463 &GLSLDecompiler::FSwizzleAdd,
2464
2465 &GLSLDecompiler::Add<Type::Int>,
2466 &GLSLDecompiler::Mul<Type::Int>,
2467 &GLSLDecompiler::Div<Type::Int>,
2468 &GLSLDecompiler::Negate<Type::Int>,
2469 &GLSLDecompiler::Absolute<Type::Int>,
2470 &GLSLDecompiler::Min<Type::Int>,
2471 &GLSLDecompiler::Max<Type::Int>,
2472
2473 &GLSLDecompiler::ICastFloat,
2474 &GLSLDecompiler::ICastUnsigned,
2475 &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
2476 &GLSLDecompiler::ILogicalShiftRight,
2477 &GLSLDecompiler::IArithmeticShiftRight,
2478 &GLSLDecompiler::BitwiseAnd<Type::Int>,
2479 &GLSLDecompiler::BitwiseOr<Type::Int>,
2480 &GLSLDecompiler::BitwiseXor<Type::Int>,
2481 &GLSLDecompiler::BitwiseNot<Type::Int>,
2482 &GLSLDecompiler::BitfieldInsert<Type::Int>,
2483 &GLSLDecompiler::BitfieldExtract<Type::Int>,
2484 &GLSLDecompiler::BitCount<Type::Int>,
2485 &GLSLDecompiler::BitMSB<Type::Int>,
2486
2487 &GLSLDecompiler::Add<Type::Uint>,
2488 &GLSLDecompiler::Mul<Type::Uint>,
2489 &GLSLDecompiler::Div<Type::Uint>,
2490 &GLSLDecompiler::Min<Type::Uint>,
2491 &GLSLDecompiler::Max<Type::Uint>,
2492 &GLSLDecompiler::UCastFloat,
2493 &GLSLDecompiler::UCastSigned,
2494 &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
2495 &GLSLDecompiler::UShiftRight,
2496 &GLSLDecompiler::UShiftRight,
2497 &GLSLDecompiler::BitwiseAnd<Type::Uint>,
2498 &GLSLDecompiler::BitwiseOr<Type::Uint>,
2499 &GLSLDecompiler::BitwiseXor<Type::Uint>,
2500 &GLSLDecompiler::BitwiseNot<Type::Uint>,
2501 &GLSLDecompiler::BitfieldInsert<Type::Uint>,
2502 &GLSLDecompiler::BitfieldExtract<Type::Uint>,
2503 &GLSLDecompiler::BitCount<Type::Uint>,
2504 &GLSLDecompiler::BitMSB<Type::Uint>,
2505
2506 &GLSLDecompiler::Add<Type::HalfFloat>,
2507 &GLSLDecompiler::Mul<Type::HalfFloat>,
2508 &GLSLDecompiler::Fma<Type::HalfFloat>,
2509 &GLSLDecompiler::Absolute<Type::HalfFloat>,
2510 &GLSLDecompiler::HNegate,
2511 &GLSLDecompiler::HClamp,
2512 &GLSLDecompiler::HCastFloat,
2513 &GLSLDecompiler::HUnpack,
2514 &GLSLDecompiler::HMergeF32,
2515 &GLSLDecompiler::HMergeH0,
2516 &GLSLDecompiler::HMergeH1,
2517 &GLSLDecompiler::HPack2,
2518
2519 &GLSLDecompiler::LogicalAssign,
2520 &GLSLDecompiler::LogicalAnd,
2521 &GLSLDecompiler::LogicalOr,
2522 &GLSLDecompiler::LogicalXor,
2523 &GLSLDecompiler::LogicalNegate,
2524 &GLSLDecompiler::LogicalPick2,
2525 &GLSLDecompiler::LogicalAnd2,
2526
2527 &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
2528 &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
2529 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
2530 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
2531 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
2532 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
2533 &GLSLDecompiler::FOrdered,
2534 &GLSLDecompiler::FUnordered,
2535 &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
2536 &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
2537 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
2538 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
2539 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
2540 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
2541
2542 &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
2543 &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
2544 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
2545 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
2546 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
2547 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
2548
2549 &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
2550 &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
2551 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
2552 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
2553 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
2554 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
2555
2556 &GLSLDecompiler::LogicalAddCarry,
2557
2558 &GLSLDecompiler::Logical2HLessThan<false>,
2559 &GLSLDecompiler::Logical2HEqual<false>,
2560 &GLSLDecompiler::Logical2HLessEqual<false>,
2561 &GLSLDecompiler::Logical2HGreaterThan<false>,
2562 &GLSLDecompiler::Logical2HNotEqual<false>,
2563 &GLSLDecompiler::Logical2HGreaterEqual<false>,
2564 &GLSLDecompiler::Logical2HLessThan<true>,
2565 &GLSLDecompiler::Logical2HEqual<true>,
2566 &GLSLDecompiler::Logical2HLessEqual<true>,
2567 &GLSLDecompiler::Logical2HGreaterThan<true>,
2568 &GLSLDecompiler::Logical2HNotEqual<true>,
2569 &GLSLDecompiler::Logical2HGreaterEqual<true>,
2570
2571 &GLSLDecompiler::Texture,
2572 &GLSLDecompiler::TextureLod,
2573 &GLSLDecompiler::TextureGather,
2574 &GLSLDecompiler::TextureQueryDimensions,
2575 &GLSLDecompiler::TextureQueryLod,
2576 &GLSLDecompiler::TexelFetch,
2577 &GLSLDecompiler::TextureGradient,
2578
2579 &GLSLDecompiler::ImageLoad,
2580 &GLSLDecompiler::ImageStore,
2581
2582 &GLSLDecompiler::AtomicImage<Func::Add>,
2583 &GLSLDecompiler::AtomicImage<Func::And>,
2584 &GLSLDecompiler::AtomicImage<Func::Or>,
2585 &GLSLDecompiler::AtomicImage<Func::Xor>,
2586 &GLSLDecompiler::AtomicImage<Func::Exchange>,
2587
2588 &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
2589 &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
2590 &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
2591 &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
2592 &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
2593 &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
2594 &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
2595
2596 &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
2597 &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
2598 &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
2599 &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
2600 &GLSLDecompiler::Atomic<Func::And, Type::Int>,
2601 &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
2602 &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
2603
2604 &GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
2605 &GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
2606 &GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
2607 &GLSLDecompiler::Reduce<Func::And, Type::Uint>,
2608 &GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
2609 &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
2610
2611 &GLSLDecompiler::Reduce<Func::Add, Type::Int>,
2612 &GLSLDecompiler::Reduce<Func::Min, Type::Int>,
2613 &GLSLDecompiler::Reduce<Func::Max, Type::Int>,
2614 &GLSLDecompiler::Reduce<Func::And, Type::Int>,
2615 &GLSLDecompiler::Reduce<Func::Or, Type::Int>,
2616 &GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
2617
2618 &GLSLDecompiler::Branch,
2619 &GLSLDecompiler::BranchIndirect,
2620 &GLSLDecompiler::PushFlowStack,
2621 &GLSLDecompiler::PopFlowStack,
2622 &GLSLDecompiler::Exit,
2623 &GLSLDecompiler::Discard,
2624
2625 &GLSLDecompiler::EmitVertex,
2626 &GLSLDecompiler::EndPrimitive,
2627
2628 &GLSLDecompiler::InvocationId,
2629 &GLSLDecompiler::YNegate,
2630 &GLSLDecompiler::LocalInvocationId<0>,
2631 &GLSLDecompiler::LocalInvocationId<1>,
2632 &GLSLDecompiler::LocalInvocationId<2>,
2633 &GLSLDecompiler::WorkGroupId<0>,
2634 &GLSLDecompiler::WorkGroupId<1>,
2635 &GLSLDecompiler::WorkGroupId<2>,
2636
2637 &GLSLDecompiler::BallotThread,
2638 &GLSLDecompiler::VoteAll,
2639 &GLSLDecompiler::VoteAny,
2640 &GLSLDecompiler::VoteEqual,
2641
2642 &GLSLDecompiler::ThreadId,
2643 &GLSLDecompiler::ThreadMask<Func::Eq>,
2644 &GLSLDecompiler::ThreadMask<Func::Ge>,
2645 &GLSLDecompiler::ThreadMask<Func::Gt>,
2646 &GLSLDecompiler::ThreadMask<Func::Le>,
2647 &GLSLDecompiler::ThreadMask<Func::Lt>,
2648 &GLSLDecompiler::ShuffleIndexed,
2649
2650 &GLSLDecompiler::Barrier,
2651 &GLSLDecompiler::MemoryBarrierGroup,
2652 &GLSLDecompiler::MemoryBarrierGlobal,
2653 };
2654 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2655
2656 std::string GetRegister(u32 index) const {
2657 return AppendSuffix(index, "gpr");
2658 }
2659
2660 std::string GetCustomVariable(u32 index) const {
2661 return AppendSuffix(index, "custom_var");
2662 }
2663
2664 std::string GetPredicate(Tegra::Shader::Pred pred) const {
2665 return AppendSuffix(static_cast<u32>(pred), "pred");
2666 }
2667
2668 std::string GetGenericInputAttribute(Attribute::Index attribute) const {
2669 return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
2670 }
2671
2672 std::unordered_map<u8, GenericVaryingDescription> varying_description;
2673
2674 std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
2675 const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
2676 const auto& description = varying_description.at(offset);
2677 if (description.is_scalar) {
2678 return description.name;
2679 }
2680 return fmt::format("{}[{}]", description.name, element - description.first_element);
2681 }
2682
2683 std::string GetConstBuffer(u32 index) const {
2684 return AppendSuffix(index, "cbuf");
2685 }
2686
2687 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
2688 return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
2689 }
2690
2691 std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
2692 return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
2693 suffix);
2694 }
2695
2696 std::string GetConstBufferBlock(u32 index) const {
2697 return AppendSuffix(index, "cbuf_block");
2698 }
2699
2700 std::string GetLocalMemory() const {
2701 if (suffix.empty()) {
2702 return "lmem";
2703 } else {
2704 return "lmem_" + std::string{suffix};
2705 }
2706 }
2707
2708 std::string GetInternalFlag(InternalFlag flag) const {
2709 constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
2710 "overflow_flag"};
2711 const auto index = static_cast<u32>(flag);
2712 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
2713
2714 if (suffix.empty()) {
2715 return InternalFlagNames[index];
2716 } else {
2717 return fmt::format("{}_{}", InternalFlagNames[index], suffix);
2718 }
2719 }
2720
2721 std::string GetSampler(const SamplerEntry& sampler) const {
2722 return AppendSuffix(sampler.index, "sampler");
2723 }
2724
2725 std::string GetImage(const ImageEntry& image) const {
2726 return AppendSuffix(image.index, "image");
2727 }
2728
2729 std::string AppendSuffix(u32 index, std::string_view name) const {
2730 if (suffix.empty()) {
2731 return fmt::format("{}{}", name, index);
2732 } else {
2733 return fmt::format("{}{}_{}", name, index, suffix);
2734 }
2735 }
2736
2737 u32 GetNumPhysicalInputAttributes() const {
2738 return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
2739 }
2740
2741 u32 GetNumPhysicalAttributes() const {
2742 return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
2743 }
2744
2745 u32 GetNumPhysicalVaryings() const {
2746 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
2747 }
2748
2749 const Device& device;
2750 const ShaderIR& ir;
2751 const Registry& registry;
2752 const ShaderType stage;
2753 const std::string_view identifier;
2754 const std::string_view suffix;
2755 const Header header;
2756 std::unordered_map<u8, VaryingTFB> transform_feedback;
2757
2758 ShaderWriter code;
2759
2760 std::optional<u32> max_input_vertices;
2761};
2762
2763std::string GetFlowVariable(u32 index) {
2764 return fmt::format("flow_var{}", index);
2765}
2766
2767class ExprDecompiler {
2768public:
2769 explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
2770
2771 void operator()(const ExprAnd& expr) {
2772 inner += '(';
2773 std::visit(*this, *expr.operand1);
2774 inner += " && ";
2775 std::visit(*this, *expr.operand2);
2776 inner += ')';
2777 }
2778
2779 void operator()(const ExprOr& expr) {
2780 inner += '(';
2781 std::visit(*this, *expr.operand1);
2782 inner += " || ";
2783 std::visit(*this, *expr.operand2);
2784 inner += ')';
2785 }
2786
2787 void operator()(const ExprNot& expr) {
2788 inner += '!';
2789 std::visit(*this, *expr.operand1);
2790 }
2791
2792 void operator()(const ExprPredicate& expr) {
2793 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
2794 inner += decomp.GetPredicate(pred);
2795 }
2796
2797 void operator()(const ExprCondCode& expr) {
2798 inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
2799 }
2800
2801 void operator()(const ExprVar& expr) {
2802 inner += GetFlowVariable(expr.var_index);
2803 }
2804
2805 void operator()(const ExprBoolean& expr) {
2806 inner += expr.value ? "true" : "false";
2807 }
2808
2809 void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
2810 inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
2811 }
2812
2813 const std::string& GetResult() const {
2814 return inner;
2815 }
2816
2817private:
2818 GLSLDecompiler& decomp;
2819 std::string inner;
2820};
2821
2822class ASTDecompiler {
2823public:
2824 explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
2825
2826 void operator()(const ASTProgram& ast) {
2827 ASTNode current = ast.nodes.GetFirst();
2828 while (current) {
2829 Visit(current);
2830 current = current->GetNext();
2831 }
2832 }
2833
2834 void operator()(const ASTIfThen& ast) {
2835 ExprDecompiler expr_parser{decomp};
2836 std::visit(expr_parser, *ast.condition);
2837 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2838 decomp.code.scope++;
2839 ASTNode current = ast.nodes.GetFirst();
2840 while (current) {
2841 Visit(current);
2842 current = current->GetNext();
2843 }
2844 decomp.code.scope--;
2845 decomp.code.AddLine("}}");
2846 }
2847
2848 void operator()(const ASTIfElse& ast) {
2849 decomp.code.AddLine("else {{");
2850 decomp.code.scope++;
2851 ASTNode current = ast.nodes.GetFirst();
2852 while (current) {
2853 Visit(current);
2854 current = current->GetNext();
2855 }
2856 decomp.code.scope--;
2857 decomp.code.AddLine("}}");
2858 }
2859
2860 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
2861 UNREACHABLE();
2862 }
2863
2864 void operator()(const ASTBlockDecoded& ast) {
2865 decomp.VisitBlock(ast.nodes);
2866 }
2867
2868 void operator()(const ASTVarSet& ast) {
2869 ExprDecompiler expr_parser{decomp};
2870 std::visit(expr_parser, *ast.condition);
2871 decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
2872 }
2873
2874 void operator()(const ASTLabel& ast) {
2875 decomp.code.AddLine("// Label_{}:", ast.index);
2876 }
2877
2878 void operator()([[maybe_unused]] const ASTGoto& ast) {
2879 UNREACHABLE();
2880 }
2881
2882 void operator()(const ASTDoWhile& ast) {
2883 ExprDecompiler expr_parser{decomp};
2884 std::visit(expr_parser, *ast.condition);
2885 decomp.code.AddLine("do {{");
2886 decomp.code.scope++;
2887 ASTNode current = ast.nodes.GetFirst();
2888 while (current) {
2889 Visit(current);
2890 current = current->GetNext();
2891 }
2892 decomp.code.scope--;
2893 decomp.code.AddLine("}} while({});", expr_parser.GetResult());
2894 }
2895
2896 void operator()(const ASTReturn& ast) {
2897 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
2898 if (!is_true) {
2899 ExprDecompiler expr_parser{decomp};
2900 std::visit(expr_parser, *ast.condition);
2901 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2902 decomp.code.scope++;
2903 }
2904 if (ast.kills) {
2905 decomp.code.AddLine("discard;");
2906 } else {
2907 decomp.PreExit();
2908 decomp.code.AddLine("return;");
2909 }
2910 if (!is_true) {
2911 decomp.code.scope--;
2912 decomp.code.AddLine("}}");
2913 }
2914 }
2915
2916 void operator()(const ASTBreak& ast) {
2917 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
2918 if (!is_true) {
2919 ExprDecompiler expr_parser{decomp};
2920 std::visit(expr_parser, *ast.condition);
2921 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2922 decomp.code.scope++;
2923 }
2924 decomp.code.AddLine("break;");
2925 if (!is_true) {
2926 decomp.code.scope--;
2927 decomp.code.AddLine("}}");
2928 }
2929 }
2930
2931 void Visit(const ASTNode& node) {
2932 std::visit(*this, *node->GetInnerData());
2933 }
2934
2935private:
2936 GLSLDecompiler& decomp;
2937};
2938
2939void GLSLDecompiler::DecompileAST() {
2940 const u32 num_flow_variables = ir.GetASTNumVariables();
2941 for (u32 i = 0; i < num_flow_variables; i++) {
2942 code.AddLine("bool {} = false;", GetFlowVariable(i));
2943 }
2944
2945 ASTDecompiler decompiler{*this};
2946 decompiler.Visit(ir.GetASTProgram());
2947}
2948
2949} // Anonymous namespace
2950
2951ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
2952 ShaderEntries entries;
2953 for (const auto& cbuf : ir.GetConstantBuffers()) {
2954 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
2955 cbuf.first);
2956 }
2957 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
2958 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
2959 usage.is_written);
2960 }
2961 for (const auto& sampler : ir.GetSamplers()) {
2962 entries.samplers.emplace_back(sampler);
2963 }
2964 for (const auto& image : ir.GetImages()) {
2965 entries.images.emplace_back(image);
2966 }
2967 const auto clip_distances = ir.GetClipDistances();
2968 for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
2969 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
2970 }
2971 for (const auto& buffer : entries.const_buffers) {
2972 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
2973 }
2974 entries.shader_length = ir.GetLength();
2975 return entries;
2976}
2977
2978std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
2979 ShaderType stage, std::string_view identifier,
2980 std::string_view suffix) {
2981 GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
2982 decompiler.Decompile();
2983 return decompiler.GetResult();
2984}
2985
2986} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
deleted file mode 100644
index 0397a000c..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ /dev/null
@@ -1,69 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <vector>
12#include "common/common_types.h"
13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/engines/shader_type.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace OpenGL {
19
20class Device;
21
22using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
24using ImageEntry = VideoCommon::Shader::ImageEntry;
25
26class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
27public:
28 explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
29 : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
30
31 u32 GetIndex() const {
32 return index;
33 }
34
35private:
36 u32 index = 0;
37};
38
39struct GlobalMemoryEntry {
40 constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
41 bool is_written_)
42 : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
43 is_written_} {}
44
45 u32 cbuf_index = 0;
46 u32 cbuf_offset = 0;
47 bool is_read = false;
48 bool is_written = false;
49};
50
51struct ShaderEntries {
52 std::vector<ConstBufferEntry> const_buffers;
53 std::vector<GlobalMemoryEntry> global_memory_entries;
54 std::vector<SamplerEntry> samplers;
55 std::vector<ImageEntry> images;
56 std::size_t shader_length{};
57 u32 clip_distances{};
58 u32 enabled_uniform_buffers{};
59};
60
61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
62 Tegra::Engines::ShaderType stage);
63
64std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
65 const VideoCommon::Shader::Registry& registry,
66 Tegra::Engines::ShaderType stage, std::string_view identifier,
67 std::string_view suffix = {});
68
69} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
deleted file mode 100644
index 0deb86517..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "common/fs/file.h"
12#include "common/fs/fs.h"
13#include "common/fs/path_util.h"
14#include "common/logging/log.h"
15#include "common/scm_rev.h"
16#include "common/settings.h"
17#include "common/zstd_compression.h"
18#include "core/core.h"
19#include "core/hle/kernel/k_process.h"
20#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_shader_cache.h"
22#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
23
24namespace OpenGL {
25
26using Tegra::Engines::ShaderType;
27using VideoCommon::Shader::BindlessSamplerMap;
28using VideoCommon::Shader::BoundSamplerMap;
29using VideoCommon::Shader::KeyMap;
30using VideoCommon::Shader::SeparateSamplerKey;
31using ShaderCacheVersionHash = std::array<u8, 64>;
32
33struct ConstBufferKey {
34 u32 cbuf = 0;
35 u32 offset = 0;
36 u32 value = 0;
37};
38
39struct BoundSamplerEntry {
40 u32 offset = 0;
41 Tegra::Engines::SamplerDescriptor sampler;
42};
43
44struct SeparateSamplerEntry {
45 u32 cbuf1 = 0;
46 u32 cbuf2 = 0;
47 u32 offset1 = 0;
48 u32 offset2 = 0;
49 Tegra::Engines::SamplerDescriptor sampler;
50};
51
52struct BindlessSamplerEntry {
53 u32 cbuf = 0;
54 u32 offset = 0;
55 Tegra::Engines::SamplerDescriptor sampler;
56};
57
58namespace {
59
60constexpr u32 NativeVersion = 21;
61
62ShaderCacheVersionHash GetShaderCacheVersionHash() {
63 ShaderCacheVersionHash hash{};
64 const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
65 std::memcpy(hash.data(), Common::g_shader_cache_version, length);
66 return hash;
67}
68
69} // Anonymous namespace
70
71ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
72
73ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
74
75bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
76 if (!file.ReadObject(type)) {
77 return false;
78 }
79 u32 code_size;
80 u32 code_size_b;
81 if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
82 return false;
83 }
84 code.resize(code_size);
85 code_b.resize(code_size_b);
86 if (file.Read(code) != code_size) {
87 return false;
88 }
89 if (HasProgramA() && file.Read(code_b) != code_size_b) {
90 return false;
91 }
92
93 u8 is_texture_handler_size_known;
94 u32 texture_handler_size_value;
95 u32 num_keys;
96 u32 num_bound_samplers;
97 u32 num_separate_samplers;
98 u32 num_bindless_samplers;
99 if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
100 !file.ReadObject(is_texture_handler_size_known) ||
101 !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
102 !file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
103 !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
104 !file.ReadObject(num_bindless_samplers)) {
105 return false;
106 }
107 if (is_texture_handler_size_known) {
108 texture_handler_size = texture_handler_size_value;
109 }
110
111 std::vector<ConstBufferKey> flat_keys(num_keys);
112 std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
113 std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
114 std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
115 if (file.Read(flat_keys) != flat_keys.size() ||
116 file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
117 file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
118 file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
119 return false;
120 }
121 for (const auto& entry : flat_keys) {
122 keys.insert({{entry.cbuf, entry.offset}, entry.value});
123 }
124 for (const auto& entry : flat_bound_samplers) {
125 bound_samplers.emplace(entry.offset, entry.sampler);
126 }
127 for (const auto& entry : flat_separate_samplers) {
128 SeparateSamplerKey key;
129 key.buffers = {entry.cbuf1, entry.cbuf2};
130 key.offsets = {entry.offset1, entry.offset2};
131 separate_samplers.emplace(key, entry.sampler);
132 }
133 for (const auto& entry : flat_bindless_samplers) {
134 bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
135 }
136
137 return true;
138}
139
140bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
141 if (!file.WriteObject(static_cast<u32>(type)) ||
142 !file.WriteObject(static_cast<u32>(code.size())) ||
143 !file.WriteObject(static_cast<u32>(code_b.size()))) {
144 return false;
145 }
146 if (file.Write(code) != code.size()) {
147 return false;
148 }
149 if (HasProgramA() && file.Write(code_b) != code_b.size()) {
150 return false;
151 }
152
153 if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
154 !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
155 !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
156 !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
157 !file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
158 !file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
159 !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
160 return false;
161 }
162
163 std::vector<ConstBufferKey> flat_keys;
164 flat_keys.reserve(keys.size());
165 for (const auto& [address, value] : keys) {
166 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
167 }
168
169 std::vector<BoundSamplerEntry> flat_bound_samplers;
170 flat_bound_samplers.reserve(bound_samplers.size());
171 for (const auto& [address, sampler] : bound_samplers) {
172 flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
173 }
174
175 std::vector<SeparateSamplerEntry> flat_separate_samplers;
176 flat_separate_samplers.reserve(separate_samplers.size());
177 for (const auto& [key, sampler] : separate_samplers) {
178 SeparateSamplerEntry entry;
179 std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
180 std::tie(entry.offset1, entry.offset2) = key.offsets;
181 entry.sampler = sampler;
182 flat_separate_samplers.push_back(entry);
183 }
184
185 std::vector<BindlessSamplerEntry> flat_bindless_samplers;
186 flat_bindless_samplers.reserve(bindless_samplers.size());
187 for (const auto& [address, sampler] : bindless_samplers) {
188 flat_bindless_samplers.push_back(
189 BindlessSamplerEntry{address.first, address.second, sampler});
190 }
191
192 return file.Write(flat_keys) == flat_keys.size() &&
193 file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
194 file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
195 file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
196}
197
198ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
199
200ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
201
202void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
203 title_id = title_id_;
204}
205
206std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
207 // Skip games without title id
208 const bool has_title_id = title_id != 0;
209 if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
210 return std::nullopt;
211 }
212
213 Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
214 Common::FS::FileType::BinaryFile};
215 if (!file.IsOpen()) {
216 LOG_INFO(Render_OpenGL, "No transferable shader cache found");
217 is_usable = true;
218 return std::nullopt;
219 }
220
221 u32 version{};
222 if (!file.ReadObject(version)) {
223 LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
224 return std::nullopt;
225 }
226
227 if (version < NativeVersion) {
228 LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
229 file.Close();
230 InvalidateTransferable();
231 is_usable = true;
232 return std::nullopt;
233 }
234 if (version > NativeVersion) {
235 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
236 "of the emulator, skipping");
237 return std::nullopt;
238 }
239
240 // Version is valid, load the shaders
241 std::vector<ShaderDiskCacheEntry> entries;
242 while (static_cast<u64>(file.Tell()) < file.GetSize()) {
243 ShaderDiskCacheEntry& entry = entries.emplace_back();
244 if (!entry.Load(file)) {
245 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
246 return std::nullopt;
247 }
248 }
249
250 is_usable = true;
251 return {std::move(entries)};
252}
253
254std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
255 if (!is_usable) {
256 return {};
257 }
258
259 Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
260 Common::FS::FileType::BinaryFile};
261 if (!file.IsOpen()) {
262 LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
263 return {};
264 }
265
266 if (const auto result = LoadPrecompiledFile(file)) {
267 return *result;
268 }
269
270 LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
271 file.Close();
272 InvalidatePrecompiled();
273 return {};
274}
275
276std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
277 Common::FS::IOFile& file) {
278 // Read compressed file from disk and decompress to virtual precompiled cache file
279 std::vector<u8> compressed(file.GetSize());
280 if (file.Read(compressed) != file.GetSize()) {
281 return std::nullopt;
282 }
283 const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
284 SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
285 precompiled_cache_virtual_file_offset = 0;
286
287 ShaderCacheVersionHash file_hash{};
288 if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
289 precompiled_cache_virtual_file_offset = 0;
290 return std::nullopt;
291 }
292 if (GetShaderCacheVersionHash() != file_hash) {
293 LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
294 precompiled_cache_virtual_file_offset = 0;
295 return std::nullopt;
296 }
297
298 std::vector<ShaderDiskCachePrecompiled> entries;
299 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
300 u32 binary_size;
301 auto& entry = entries.emplace_back();
302 if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
303 !LoadObjectFromPrecompiled(entry.binary_format) ||
304 !LoadObjectFromPrecompiled(binary_size)) {
305 return std::nullopt;
306 }
307
308 entry.binary.resize(binary_size);
309 if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
310 return std::nullopt;
311 }
312 }
313 return entries;
314}
315
316void ShaderDiskCacheOpenGL::InvalidateTransferable() {
317 if (!Common::FS::RemoveFile(GetTransferablePath())) {
318 LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
319 Common::FS::PathToUTF8String(GetTransferablePath()));
320 }
321 InvalidatePrecompiled();
322}
323
324void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
325 // Clear virtaul precompiled cache file
326 precompiled_cache_virtual_file.Resize(0);
327
328 if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
329 LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
330 Common::FS::PathToUTF8String(GetPrecompiledPath()));
331 }
332}
333
334void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
335 if (!is_usable) {
336 return;
337 }
338
339 const u64 id = entry.unique_identifier;
340 if (stored_transferable.contains(id)) {
341 // The shader already exists
342 return;
343 }
344
345 Common::FS::IOFile file = AppendTransferableFile();
346 if (!file.IsOpen()) {
347 return;
348 }
349 if (!entry.Save(file)) {
350 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
351 file.Close();
352 InvalidateTransferable();
353 return;
354 }
355
356 stored_transferable.insert(id);
357}
358
359void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
360 if (!is_usable) {
361 return;
362 }
363
364 // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
365 // when writing the dump. This should be done the moment I get access to write to the virtual
366 // file.
367 if (precompiled_cache_virtual_file.GetSize() == 0) {
368 SavePrecompiledHeaderToVirtualPrecompiledCache();
369 }
370
371 GLint binary_length;
372 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
373
374 GLenum binary_format;
375 std::vector<u8> binary(binary_length);
376 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
377
378 if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
379 !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
380 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
381 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
382 unique_identifier);
383 InvalidatePrecompiled();
384 }
385}
386
387Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
388 if (!EnsureDirectories()) {
389 return {};
390 }
391
392 const auto transferable_path{GetTransferablePath()};
393 const bool existed = Common::FS::Exists(transferable_path);
394
395 Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
396 Common::FS::FileType::BinaryFile};
397 if (!file.IsOpen()) {
398 LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
399 Common::FS::PathToUTF8String(transferable_path));
400 return {};
401 }
402 if (!existed || file.GetSize() == 0) {
403 // If the file didn't exist, write its version
404 if (!file.WriteObject(NativeVersion)) {
405 LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
406 Common::FS::PathToUTF8String(transferable_path));
407 return {};
408 }
409 }
410 return file;
411}
412
413void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
414 const auto hash{GetShaderCacheVersionHash()};
415 if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
416 LOG_ERROR(
417 Render_OpenGL,
418 "Failed to write precompiled cache version hash to virtual precompiled cache file");
419 }
420}
421
422void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
423 precompiled_cache_virtual_file_offset = 0;
424 const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
425 const std::vector<u8> compressed =
426 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
427
428 const auto precompiled_path = GetPrecompiledPath();
429 Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
430 Common::FS::FileType::BinaryFile};
431
432 if (!file.IsOpen()) {
433 LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
434 Common::FS::PathToUTF8String(precompiled_path));
435 return;
436 }
437 if (file.Write(compressed) != compressed.size()) {
438 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
439 Common::FS::PathToUTF8String(precompiled_path));
440 }
441}
442
443bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
444 const auto CreateDir = [](const std::filesystem::path& dir) {
445 if (!Common::FS::CreateDir(dir)) {
446 LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
447 Common::FS::PathToUTF8String(dir));
448 return false;
449 }
450 return true;
451 };
452
453 return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
454 CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
455 CreateDir(GetPrecompiledDir());
456}
457
458std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
459 return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
460}
461
462std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
463 return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
464}
465
466std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
467 return GetBaseDir() / "transferable";
468}
469
470std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
471 return GetBaseDir() / "precompiled";
472}
473
474std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
475 return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
476}
477
478std::string ShaderDiskCacheOpenGL::GetTitleID() const {
479 return fmt::format("{:016X}", title_id);
480}
481
482} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
deleted file mode 100644
index f8bc23868..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ /dev/null
@@ -1,176 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <filesystem>
8#include <optional>
9#include <string>
10#include <tuple>
11#include <type_traits>
12#include <unordered_map>
13#include <unordered_set>
14#include <utility>
15#include <vector>
16
17#include <glad/glad.h>
18
19#include "common/assert.h"
20#include "common/common_types.h"
21#include "core/file_sys/vfs_vector.h"
22#include "video_core/engines/shader_type.h"
23#include "video_core/shader/registry.h"
24
25namespace Common::FS {
26class IOFile;
27}
28
29namespace OpenGL {
30
31using ProgramCode = std::vector<u64>;
32
33/// Describes a shader and how it's used by the guest GPU
34struct ShaderDiskCacheEntry {
35 ShaderDiskCacheEntry();
36 ~ShaderDiskCacheEntry();
37
38 bool Load(Common::FS::IOFile& file);
39
40 bool Save(Common::FS::IOFile& file) const;
41
42 bool HasProgramA() const {
43 return !code.empty() && !code_b.empty();
44 }
45
46 Tegra::Engines::ShaderType type{};
47 ProgramCode code;
48 ProgramCode code_b;
49
50 u64 unique_identifier = 0;
51 std::optional<u32> texture_handler_size;
52 u32 bound_buffer = 0;
53 VideoCommon::Shader::GraphicsInfo graphics_info;
54 VideoCommon::Shader::ComputeInfo compute_info;
55 VideoCommon::Shader::KeyMap keys;
56 VideoCommon::Shader::BoundSamplerMap bound_samplers;
57 VideoCommon::Shader::SeparateSamplerMap separate_samplers;
58 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
59};
60
61/// Contains an OpenGL dumped binary program
62struct ShaderDiskCachePrecompiled {
63 u64 unique_identifier = 0;
64 GLenum binary_format = 0;
65 std::vector<u8> binary;
66};
67
68class ShaderDiskCacheOpenGL {
69public:
70 explicit ShaderDiskCacheOpenGL();
71 ~ShaderDiskCacheOpenGL();
72
73 /// Binds a title ID for all future operations.
74 void BindTitleID(u64 title_id);
75
76 /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
77 std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
78
79 /// Loads current game's precompiled cache. Invalidates on failure.
80 std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
81
82 /// Removes the transferable (and precompiled) cache file.
83 void InvalidateTransferable();
84
85 /// Removes the precompiled cache file and clears virtual precompiled cache file.
86 void InvalidatePrecompiled();
87
88 /// Saves a raw dump to the transferable file. Checks for collisions.
89 void SaveEntry(const ShaderDiskCacheEntry& entry);
90
91 /// Saves a dump entry to the precompiled file. Does not check for collisions.
92 void SavePrecompiled(u64 unique_identifier, GLuint program);
93
94 /// Serializes virtual precompiled shader cache file to real file
95 void SaveVirtualPrecompiledFile();
96
97private:
98 /// Loads the transferable cache. Returns empty on failure.
99 std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
100 Common::FS::IOFile& file);
101
102 /// Opens current game's transferable file and write it's header if it doesn't exist
103 Common::FS::IOFile AppendTransferableFile() const;
104
105 /// Save precompiled header to precompiled_cache_in_memory
106 void SavePrecompiledHeaderToVirtualPrecompiledCache();
107
108 /// Create shader disk cache directories. Returns true on success.
109 bool EnsureDirectories() const;
110
111 /// Gets current game's transferable file path
112 std::filesystem::path GetTransferablePath() const;
113
114 /// Gets current game's precompiled file path
115 std::filesystem::path GetPrecompiledPath() const;
116
117 /// Get user's transferable directory path
118 std::filesystem::path GetTransferableDir() const;
119
120 /// Get user's precompiled directory path
121 std::filesystem::path GetPrecompiledDir() const;
122
123 /// Get user's shader directory path
124 std::filesystem::path GetBaseDir() const;
125
126 /// Get current game's title id
127 std::string GetTitleID() const;
128
129 template <typename T>
130 bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
131 const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
132 data, length, precompiled_cache_virtual_file_offset);
133 precompiled_cache_virtual_file_offset += write_length;
134 return write_length == sizeof(T) * length;
135 }
136
137 template <typename T>
138 bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
139 const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
140 data, length, precompiled_cache_virtual_file_offset);
141 precompiled_cache_virtual_file_offset += read_length;
142 return read_length == sizeof(T) * length;
143 }
144
145 template <typename T>
146 bool SaveObjectToPrecompiled(const T& object) {
147 return SaveArrayToPrecompiled(&object, 1);
148 }
149
150 bool SaveObjectToPrecompiled(bool object) {
151 const auto value = static_cast<u8>(object);
152 return SaveArrayToPrecompiled(&value, 1);
153 }
154
155 template <typename T>
156 bool LoadObjectFromPrecompiled(T& object) {
157 return LoadArrayFromPrecompiled(&object, 1);
158 }
159
160 // Stores whole precompiled cache which will be read from or saved to the precompiled chache
161 // file
162 FileSys::VectorVfsFile precompiled_cache_virtual_file;
163 // Stores the current offset of the precompiled cache file for IO purposes
164 std::size_t precompiled_cache_virtual_file_offset = 0;
165
166 // Stored transferable shaders
167 std::unordered_set<u64> stored_transferable;
168
169 /// Title ID to operate on
170 u64 title_id = 0;
171
172 // The cache has been loaded at boot
173 bool is_usable = false;
174};
175
176} // namespace OpenGL