summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-06-03 18:07:35 -0300
committerGravatar ReinUsesLisp2020-06-11 22:12:07 -0300
commita63a0daa5e773574019ec521c0a07096efbdcd36 (patch)
treebbf61edb6185f0d5cd7c2bc79d91e39fa47cb45b
parentyuzu/configuration: Show assembly shaders check box (diff)
downloadyuzu-a63a0daa5e773574019ec521c0a07096efbdcd36.tar.gz
yuzu-a63a0daa5e773574019ec521c0a07096efbdcd36.tar.xz
yuzu-a63a0daa5e773574019ec521c0a07096efbdcd36.zip
gl_arb_decompiler: Implement an assembly shader decompiler
Emit code compatible with NV_gpu_program5. This should emit code compatible with Fermi, but it wasn't tested on that architecture. Pascal has some issues not present on Turing GPUs.
Diffstat (limited to '')
-rw-r--r--CMakeModules/GenerateSCMRev.cmake2
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2051
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
8 files changed, 2095 insertions, 1 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 83e4e9df2..311ba1c2e 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -51,6 +51,8 @@ endif()
51# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) 51# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
52set(VIDEO_CORE "${SRC_DIR}/src/video_core") 52set(VIDEO_CORE "${SRC_DIR}/src/video_core")
53set(HASH_FILES 53set(HASH_FILES
54 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
55 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
54 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" 56 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
55 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" 57 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
56 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" 58 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 24b7a083c..0a3e2f4d1 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
32 DEPENDS 32 DEPENDS
33 # WARNING! It was too much work to try and make a common location for this list, 33 # WARNING! It was too much work to try and make a common location for this list,
34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well 34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
35 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
35 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" 37 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" 38 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
37 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" 39 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2bf8d68ce..2af713af2 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -54,6 +54,8 @@ add_library(video_core STATIC
54 rasterizer_interface.h 54 rasterizer_interface.h
55 renderer_base.cpp 55 renderer_base.cpp
56 renderer_base.h 56 renderer_base.h
57 renderer_opengl/gl_arb_decompiler.cpp
58 renderer_opengl/gl_arb_decompiler.h
57 renderer_opengl/gl_buffer_cache.cpp 59 renderer_opengl/gl_buffer_cache.cpp
58 renderer_opengl/gl_buffer_cache.h 60 renderer_opengl/gl_buffer_cache.h
59 renderer_opengl/gl_device.cpp 61 renderer_opengl/gl_device.cpp
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
new file mode 100644
index 000000000..6a23221bb
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -0,0 +1,2051 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <variant>
12
13#include <fmt/format.h>
14
15#include "common/alignment.h"
16#include "common/assert.h"
17#include "common/common_types.h"
18#include "video_core/renderer_opengl/gl_arb_decompiler.h"
19#include "video_core/renderer_opengl/gl_device.h"
20#include "video_core/shader/registry.h"
21#include "video_core/shader/shader_ir.h"
22
23// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
24// GLASM lacks booleans, so they have to be implemented as integers.
25// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
26// select between two values, because -1 will be evaluated as true and 0 as false.
27
28namespace OpenGL {
29
30namespace {
31
32using Tegra::Engines::ShaderType;
33using Tegra::Shader::Attribute;
34using Tegra::Shader::PixelImap;
35using Tegra::Shader::Register;
36using namespace VideoCommon::Shader;
37using Operation = const OperationNode&;
38
39constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
40
41char Swizzle(std::size_t component) {
42 ASSERT(component < 4);
43 return component["xyzw"];
44}
45
46constexpr bool IsGenericAttribute(Attribute::Index index) {
47 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
48}
49
50u32 GetGenericAttributeIndex(Attribute::Index index) {
51 ASSERT(IsGenericAttribute(index));
52 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
53}
54
55std::string_view Modifiers(Operation operation) {
56 const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
57 if (meta && meta->precise) {
58 return ".PREC";
59 }
60 return "";
61}
62
63std::string_view GetInputFlags(PixelImap attribute) {
64 switch (attribute) {
65 case PixelImap::Perspective:
66 return "";
67 case PixelImap::Constant:
68 return "FLAT ";
69 case PixelImap::ScreenLinear:
70 return "NOPERSPECTIVE ";
71 case PixelImap::Unused:
72 break;
73 }
74 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
75 return {};
76}
77
78std::string_view ImageType(Tegra::Shader::ImageType image_type) {
79 switch (image_type) {
80 case Tegra::Shader::ImageType::Texture1D:
81 return "1D";
82 case Tegra::Shader::ImageType::TextureBuffer:
83 return "BUFFER";
84 case Tegra::Shader::ImageType::Texture1DArray:
85 return "ARRAY1D";
86 case Tegra::Shader::ImageType::Texture2D:
87 return "2D";
88 case Tegra::Shader::ImageType::Texture2DArray:
89 return "ARRAY2D";
90 case Tegra::Shader::ImageType::Texture3D:
91 return "3D";
92 }
93 UNREACHABLE();
94 return {};
95}
96
97std::string_view StackName(MetaStackClass stack) {
98 switch (stack) {
99 case MetaStackClass::Ssy:
100 return "SSY";
101 case MetaStackClass::Pbk:
102 return "PBK";
103 }
104 UNREACHABLE();
105 return "";
106};
107
108std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
109 switch (topology) {
110 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
111 return "POINTS";
112 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
113 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
114 return "LINES";
115 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
116 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
117 return "LINES_ADJACENCY";
118 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
119 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
120 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
121 return "TRIANGLES";
122 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
123 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
124 return "TRIANGLES_ADJACENCY";
125 default:
126 UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
127 return "POINTS";
128 }
129}
130
131std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
132 switch (topology) {
133 case Tegra::Shader::OutputTopology::PointList:
134 return "POINTS";
135 case Tegra::Shader::OutputTopology::LineStrip:
136 return "LINE_STRIP";
137 case Tegra::Shader::OutputTopology::TriangleStrip:
138 return "TRIANGLE_STRIP";
139 default:
140 UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
141 return "points";
142 }
143}
144
145std::string_view StageInputName(ShaderType stage) {
146 switch (stage) {
147 case ShaderType::Vertex:
148 case ShaderType::Geometry:
149 return "vertex";
150 case ShaderType::Fragment:
151 return "fragment";
152 case ShaderType::Compute:
153 return "invocation";
154 default:
155 UNREACHABLE();
156 return "";
157 }
158}
159
160std::string TextureType(const MetaTexture& meta) {
161 if (meta.sampler.is_buffer) {
162 return "BUFFER";
163 }
164 std::string type;
165 if (meta.sampler.is_shadow) {
166 type += "SHADOW";
167 }
168 if (meta.sampler.is_array) {
169 type += "ARRAY";
170 }
171 type += [&meta] {
172 switch (meta.sampler.type) {
173 case Tegra::Shader::TextureType::Texture1D:
174 return "1D";
175 case Tegra::Shader::TextureType::Texture2D:
176 return "2D";
177 case Tegra::Shader::TextureType::Texture3D:
178 return "3D";
179 case Tegra::Shader::TextureType::TextureCube:
180 return "CUBE";
181 }
182 UNREACHABLE();
183 return "2D";
184 }();
185 return type;
186}
187
188std::string GlobalMemoryName(const GlobalMemoryBase& base) {
189 return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
190}
191
192class ARBDecompiler final {
193public:
194 explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
195 ShaderType stage, std::string_view identifier);
196
197 std::string Code() const {
198 return shader_source;
199 }
200
201private:
202 void DeclareHeader();
203 void DeclareVertex();
204 void DeclareGeometry();
205 void DeclareFragment();
206 void DeclareCompute();
207 void DeclareInputAttributes();
208 void DeclareOutputAttributes();
209 void DeclareLocalMemory();
210 void DeclareGlobalMemory();
211 void DeclareConstantBuffers();
212 void DeclareRegisters();
213 void DeclareTemporaries();
214 void DeclarePredicates();
215 void DeclareInternalFlags();
216
217 void InitializeVariables();
218
219 void DecompileAST();
220 void DecompileBranchMode();
221
222 void VisitAST(const ASTNode& node);
223 std::string VisitExpression(const Expr& node);
224
225 void VisitBlock(const NodeBlock& bb);
226
227 std::string Visit(const Node& node);
228
229 std::pair<std::string, std::size_t> BuildCoords(Operation);
230 std::string BuildAoffi(Operation);
231 void Exit();
232
233 std::string Assign(Operation);
234 std::string Select(Operation);
235 std::string FClamp(Operation);
236 std::string FCastHalf0(Operation);
237 std::string FCastHalf1(Operation);
238 std::string FSqrt(Operation);
239 std::string FSwizzleAdd(Operation);
240 std::string HAdd2(Operation);
241 std::string HMul2(Operation);
242 std::string HFma2(Operation);
243 std::string HAbsolute(Operation);
244 std::string HNegate(Operation);
245 std::string HClamp(Operation);
246 std::string HCastFloat(Operation);
247 std::string HUnpack(Operation);
248 std::string HMergeF32(Operation);
249 std::string HMergeH0(Operation);
250 std::string HMergeH1(Operation);
251 std::string HPack2(Operation);
252 std::string LogicalAssign(Operation);
253 std::string LogicalPick2(Operation);
254 std::string LogicalAnd2(Operation);
255 std::string FloatOrdered(Operation);
256 std::string FloatUnordered(Operation);
257 std::string LogicalAddCarry(Operation);
258 std::string Texture(Operation);
259 std::string TextureGather(Operation);
260 std::string TextureQueryDimensions(Operation);
261 std::string TextureQueryLod(Operation);
262 std::string TexelFetch(Operation);
263 std::string TextureGradient(Operation);
264 std::string ImageLoad(Operation);
265 std::string ImageStore(Operation);
266 std::string Branch(Operation);
267 std::string BranchIndirect(Operation);
268 std::string PushFlowStack(Operation);
269 std::string PopFlowStack(Operation);
270 std::string Exit(Operation);
271 std::string Discard(Operation);
272 std::string EmitVertex(Operation);
273 std::string EndPrimitive(Operation);
274 std::string InvocationId(Operation);
275 std::string YNegate(Operation);
276 std::string ThreadId(Operation);
277 std::string ShuffleIndexed(Operation);
278 std::string Barrier(Operation);
279 std::string MemoryBarrierGroup(Operation);
280 std::string MemoryBarrierGlobal(Operation);
281
282 template <const std::string_view& op>
283 std::string Unary(Operation operation) {
284 const std::string temporary = AllocTemporary();
285 AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
286 return temporary;
287 }
288
289 template <const std::string_view& op>
290 std::string Binary(Operation operation) {
291 const std::string temporary = AllocTemporary();
292 AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
293 Visit(operation[1]));
294 return temporary;
295 }
296
297 template <const std::string_view& op>
298 std::string Trinary(Operation operation) {
299 const std::string temporary = AllocTemporary();
300 AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
301 Visit(operation[1]), Visit(operation[2]));
302 return temporary;
303 }
304
305 template <const std::string_view& op, bool unordered>
306 std::string FloatComparison(Operation operation) {
307 const std::string temporary = AllocTemporary();
308 AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
309 AddLine("MOV.S {}, 0;", temporary);
310 AddLine("MOV.S {} (NE.x), -1;", temporary);
311
312 const std::string op_a = Visit(operation[0]);
313 const std::string op_b = Visit(operation[1]);
314 if constexpr (unordered) {
315 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
316 AddLine("TRUNC.U.CC RC.x, RC.x;");
317 AddLine("MOV.S {} (NE.x), -1;", temporary);
318 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
319 AddLine("TRUNC.U.CC RC.x, RC.x;");
320 AddLine("MOV.S {} (NE.x), -1;", temporary);
321 } else if (op == SNE_F) {
322 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
323 AddLine("TRUNC.U.CC RC.x, RC.x;");
324 AddLine("MOV.S {} (NE.x), 0;", temporary);
325 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
326 AddLine("TRUNC.U.CC RC.x, RC.x;");
327 AddLine("MOV.S {} (NE.x), 0;", temporary);
328 }
329 return temporary;
330 }
331
332 template <const std::string_view& op, bool is_nan>
333 std::string HalfComparison(Operation operation) {
334 const std::string tmp1 = AllocVectorTemporary();
335 const std::string tmp2 = AllocVectorTemporary();
336 const std::string op_a = Visit(operation[0]);
337 const std::string op_b = Visit(operation[1]);
338 AddLine("UP2H.F {}, {};", tmp1, op_a);
339 AddLine("UP2H.F {}, {};", tmp2, op_b);
340 AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
341 AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
342 AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
343 AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
344 AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
345 if constexpr (is_nan) {
346 AddLine("MOVC.F RC.x, {};", op_a);
347 AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
348 AddLine("MOVC.F RC.x, {};", op_b);
349 AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
350 }
351 return tmp1;
352 }
353
354 template <const std::string_view& op, const std::string_view& type>
355 std::string AtomicImage(Operation operation) {
356 const auto& meta = std::get<MetaImage>(operation.GetMeta());
357 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
358 const std::size_t num_coords = operation.GetOperandsCount();
359 const std::size_t num_values = meta.values.size();
360
361 const std::string coord = AllocVectorTemporary();
362 const std::string value = AllocVectorTemporary();
363 for (std::size_t i = 0; i < num_coords; ++i) {
364 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
365 }
366 for (std::size_t i = 0; i < num_values; ++i) {
367 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
368 }
369
370 const std::string result = coord;
371 AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, result, value, coord,
372 image_id, ImageType(meta.image.type));
373 return fmt::format("{}.x", result);
374 }
375
376 template <const std::string_view& op, const std::string_view& type>
377 std::string Atomic(Operation operation) {
378 const std::string temporary = AllocTemporary();
379 std::string address;
380 std::string_view opname;
381 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
382 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
383 Visit(gmem->GetBaseAddress()));
384 address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
385 opname = "ATOMB";
386 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
387 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
388 opname = "ATOMS";
389 } else {
390 UNREACHABLE();
391 return "{0, 0, 0, 0}";
392 }
393 AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
394 return temporary;
395 }
396
397 template <char type>
398 std::string Negate(Operation operation) {
399 const std::string temporary = AllocTemporary();
400 if constexpr (type == 'F') {
401 AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
402 } else {
403 AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
404 }
405 return temporary;
406 }
407
408 template <char type>
409 std::string Absolute(Operation operation) {
410 const std::string temporary = AllocTemporary();
411 AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
412 return temporary;
413 }
414
415 template <char type>
416 std::string BitfieldInsert(Operation operation) {
417 const std::string temporary = AllocVectorTemporary();
418 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
419 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
420 AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
421 Visit(operation[0]));
422 return fmt::format("{}.x", temporary);
423 }
424
425 template <char type>
426 std::string BitfieldExtract(Operation operation) {
427 const std::string temporary = AllocVectorTemporary();
428 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
429 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
430 AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
431 return fmt::format("{}.x", temporary);
432 }
433
434 template <char swizzle>
435 std::string LocalInvocationId(Operation) {
436 return fmt::format("invocation.localid.{}", swizzle);
437 }
438
439 template <char swizzle>
440 std::string WorkGroupId(Operation) {
441 return fmt::format("invocation.groupid.{}", swizzle);
442 }
443
444 template <char c1, char c2>
445 std::string ThreadMask(Operation) {
446 return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
447 }
448
449 template <typename... Args>
450 void AddExpression(std::string_view text, Args&&... args) {
451 shader_source += fmt::format(text, std::forward<Args>(args)...);
452 }
453
454 template <typename... Args>
455 void AddLine(std::string_view text, Args&&... args) {
456 AddExpression(text, std::forward<Args>(args)...);
457 shader_source += '\n';
458 }
459
460 std::string AllocTemporary() {
461 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
462 return fmt::format("T{}.x", num_temporaries++);
463 }
464
465 std::string AllocVectorTemporary() {
466 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
467 return fmt::format("T{}", num_temporaries++);
468 }
469
470 void ResetTemporaries() noexcept {
471 num_temporaries = 0;
472 }
473
474 const Device& device;
475 const ShaderIR& ir;
476 const Registry& registry;
477 const ShaderType stage;
478
479 std::size_t num_temporaries = 0;
480 std::size_t max_temporaries = 0;
481
482 std::string shader_source;
483
484 static constexpr std::string_view ADD_F32 = "ADD.F32";
485 static constexpr std::string_view ADD_S = "ADD.S";
486 static constexpr std::string_view ADD_U = "ADD.U";
487 static constexpr std::string_view MUL_F32 = "MUL.F32";
488 static constexpr std::string_view MUL_S = "MUL.S";
489 static constexpr std::string_view MUL_U = "MUL.U";
490 static constexpr std::string_view DIV_F32 = "DIV.F32";
491 static constexpr std::string_view DIV_S = "DIV.S";
492 static constexpr std::string_view DIV_U = "DIV.U";
493 static constexpr std::string_view MAD_F32 = "MAD.F32";
494 static constexpr std::string_view RSQ_F32 = "RSQ.F32";
495 static constexpr std::string_view COS_F32 = "COS.F32";
496 static constexpr std::string_view SIN_F32 = "SIN.F32";
497 static constexpr std::string_view EX2_F32 = "EX2.F32";
498 static constexpr std::string_view LG2_F32 = "LG2.F32";
499 static constexpr std::string_view SLT_F = "SLT.F32";
500 static constexpr std::string_view SLT_S = "SLT.S";
501 static constexpr std::string_view SLT_U = "SLT.U";
502 static constexpr std::string_view SEQ_F = "SEQ.F32";
503 static constexpr std::string_view SEQ_S = "SEQ.S";
504 static constexpr std::string_view SEQ_U = "SEQ.U";
505 static constexpr std::string_view SLE_F = "SLE.F32";
506 static constexpr std::string_view SLE_S = "SLE.S";
507 static constexpr std::string_view SLE_U = "SLE.U";
508 static constexpr std::string_view SGT_F = "SGT.F32";
509 static constexpr std::string_view SGT_S = "SGT.S";
510 static constexpr std::string_view SGT_U = "SGT.U";
511 static constexpr std::string_view SNE_F = "SNE.F32";
512 static constexpr std::string_view SNE_S = "SNE.S";
513 static constexpr std::string_view SNE_U = "SNE.U";
514 static constexpr std::string_view SGE_F = "SGE.F32";
515 static constexpr std::string_view SGE_S = "SGE.S";
516 static constexpr std::string_view SGE_U = "SGE.U";
517 static constexpr std::string_view AND_S = "AND.S";
518 static constexpr std::string_view AND_U = "AND.U";
519 static constexpr std::string_view TRUNC_F = "TRUNC.F";
520 static constexpr std::string_view TRUNC_S = "TRUNC.S";
521 static constexpr std::string_view TRUNC_U = "TRUNC.U";
522 static constexpr std::string_view SHL_S = "SHL.S";
523 static constexpr std::string_view SHL_U = "SHL.U";
524 static constexpr std::string_view SHR_S = "SHR.S";
525 static constexpr std::string_view SHR_U = "SHR.U";
526 static constexpr std::string_view OR_S = "OR.S";
527 static constexpr std::string_view OR_U = "OR.U";
528 static constexpr std::string_view XOR_S = "XOR.S";
529 static constexpr std::string_view XOR_U = "XOR.U";
530 static constexpr std::string_view NOT_S = "NOT.S";
531 static constexpr std::string_view NOT_U = "NOT.U";
532 static constexpr std::string_view BTC_S = "BTC.S";
533 static constexpr std::string_view BTC_U = "BTC.U";
534 static constexpr std::string_view BTFM_S = "BTFM.S";
535 static constexpr std::string_view BTFM_U = "BTFM.U";
536 static constexpr std::string_view ROUND_F = "ROUND.F";
537 static constexpr std::string_view CEIL_F = "CEIL.F";
538 static constexpr std::string_view FLR_F = "FLR.F";
539 static constexpr std::string_view I2F_S = "I2F.S";
540 static constexpr std::string_view I2F_U = "I2F.U";
541 static constexpr std::string_view MIN_F = "MIN.F";
542 static constexpr std::string_view MIN_S = "MIN.S";
543 static constexpr std::string_view MIN_U = "MIN.U";
544 static constexpr std::string_view MAX_F = "MAX.F";
545 static constexpr std::string_view MAX_S = "MAX.S";
546 static constexpr std::string_view MAX_U = "MAX.U";
547 static constexpr std::string_view MOV_U = "MOV.U";
548 static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
549 static constexpr std::string_view TGALL_U = "TGALL.U";
550 static constexpr std::string_view TGANY_U = "TGANY.U";
551 static constexpr std::string_view TGEQ_U = "TGEQ.U";
552 static constexpr std::string_view EXCH = "EXCH";
553 static constexpr std::string_view ADD = "ADD";
554 static constexpr std::string_view MIN = "MIN";
555 static constexpr std::string_view MAX = "MAX";
556 static constexpr std::string_view AND = "AND";
557 static constexpr std::string_view OR = "OR";
558 static constexpr std::string_view XOR = "XOR";
559 static constexpr std::string_view U32 = "U32";
560 static constexpr std::string_view S32 = "S32";
561
562 static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
563 using DecompilerType = std::string (ARBDecompiler::*)(Operation);
564 static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
565 &ARBDecompiler::Assign,
566
567 &ARBDecompiler::Select,
568
569 &ARBDecompiler::Binary<ADD_F32>,
570 &ARBDecompiler::Binary<MUL_F32>,
571 &ARBDecompiler::Binary<DIV_F32>,
572 &ARBDecompiler::Trinary<MAD_F32>,
573 &ARBDecompiler::Negate<'F'>,
574 &ARBDecompiler::Absolute<'F'>,
575 &ARBDecompiler::FClamp,
576 &ARBDecompiler::FCastHalf0,
577 &ARBDecompiler::FCastHalf1,
578 &ARBDecompiler::Binary<MIN_F>,
579 &ARBDecompiler::Binary<MAX_F>,
580 &ARBDecompiler::Unary<COS_F32>,
581 &ARBDecompiler::Unary<SIN_F32>,
582 &ARBDecompiler::Unary<EX2_F32>,
583 &ARBDecompiler::Unary<LG2_F32>,
584 &ARBDecompiler::Unary<RSQ_F32>,
585 &ARBDecompiler::FSqrt,
586 &ARBDecompiler::Unary<ROUND_F>,
587 &ARBDecompiler::Unary<FLR_F>,
588 &ARBDecompiler::Unary<CEIL_F>,
589 &ARBDecompiler::Unary<TRUNC_F>,
590 &ARBDecompiler::Unary<I2F_S>,
591 &ARBDecompiler::Unary<I2F_U>,
592 &ARBDecompiler::FSwizzleAdd,
593
594 &ARBDecompiler::Binary<ADD_S>,
595 &ARBDecompiler::Binary<MUL_S>,
596 &ARBDecompiler::Binary<DIV_S>,
597 &ARBDecompiler::Negate<'S'>,
598 &ARBDecompiler::Absolute<'S'>,
599 &ARBDecompiler::Binary<MIN_S>,
600 &ARBDecompiler::Binary<MAX_S>,
601
602 &ARBDecompiler::Unary<TRUNC_S>,
603 &ARBDecompiler::Unary<MOV_U>,
604 &ARBDecompiler::Binary<SHL_S>,
605 &ARBDecompiler::Binary<SHR_U>,
606 &ARBDecompiler::Binary<SHR_S>,
607 &ARBDecompiler::Binary<AND_S>,
608 &ARBDecompiler::Binary<OR_S>,
609 &ARBDecompiler::Binary<XOR_S>,
610 &ARBDecompiler::Unary<NOT_S>,
611 &ARBDecompiler::BitfieldInsert<'S'>,
612 &ARBDecompiler::BitfieldExtract<'S'>,
613 &ARBDecompiler::Unary<BTC_S>,
614 &ARBDecompiler::Unary<BTFM_S>,
615
616 &ARBDecompiler::Binary<ADD_U>,
617 &ARBDecompiler::Binary<MUL_U>,
618 &ARBDecompiler::Binary<DIV_U>,
619 &ARBDecompiler::Binary<MIN_U>,
620 &ARBDecompiler::Binary<MAX_U>,
621 &ARBDecompiler::Unary<TRUNC_U>,
622 &ARBDecompiler::Unary<MOV_U>,
623 &ARBDecompiler::Binary<SHL_U>,
624 &ARBDecompiler::Binary<SHR_U>,
625 &ARBDecompiler::Binary<SHR_U>,
626 &ARBDecompiler::Binary<AND_U>,
627 &ARBDecompiler::Binary<OR_U>,
628 &ARBDecompiler::Binary<XOR_U>,
629 &ARBDecompiler::Unary<NOT_U>,
630 &ARBDecompiler::BitfieldInsert<'U'>,
631 &ARBDecompiler::BitfieldExtract<'U'>,
632 &ARBDecompiler::Unary<BTC_U>,
633 &ARBDecompiler::Unary<BTFM_U>,
634
635 &ARBDecompiler::HAdd2,
636 &ARBDecompiler::HMul2,
637 &ARBDecompiler::HFma2,
638 &ARBDecompiler::HAbsolute,
639 &ARBDecompiler::HNegate,
640 &ARBDecompiler::HClamp,
641 &ARBDecompiler::HCastFloat,
642 &ARBDecompiler::HUnpack,
643 &ARBDecompiler::HMergeF32,
644 &ARBDecompiler::HMergeH0,
645 &ARBDecompiler::HMergeH1,
646 &ARBDecompiler::HPack2,
647
648 &ARBDecompiler::LogicalAssign,
649 &ARBDecompiler::Binary<AND_U>,
650 &ARBDecompiler::Binary<OR_U>,
651 &ARBDecompiler::Binary<XOR_U>,
652 &ARBDecompiler::Unary<NOT_U>,
653 &ARBDecompiler::LogicalPick2,
654 &ARBDecompiler::LogicalAnd2,
655
656 &ARBDecompiler::FloatComparison<SLT_F, false>,
657 &ARBDecompiler::FloatComparison<SEQ_F, false>,
658 &ARBDecompiler::FloatComparison<SLE_F, false>,
659 &ARBDecompiler::FloatComparison<SGT_F, false>,
660 &ARBDecompiler::FloatComparison<SNE_F, false>,
661 &ARBDecompiler::FloatComparison<SGE_F, false>,
662 &ARBDecompiler::FloatOrdered,
663 &ARBDecompiler::FloatUnordered,
664 &ARBDecompiler::FloatComparison<SLT_F, true>,
665 &ARBDecompiler::FloatComparison<SEQ_F, true>,
666 &ARBDecompiler::FloatComparison<SLE_F, true>,
667 &ARBDecompiler::FloatComparison<SGT_F, true>,
668 &ARBDecompiler::FloatComparison<SNE_F, true>,
669 &ARBDecompiler::FloatComparison<SGE_F, true>,
670
671 &ARBDecompiler::Binary<SLT_S>,
672 &ARBDecompiler::Binary<SEQ_S>,
673 &ARBDecompiler::Binary<SLE_S>,
674 &ARBDecompiler::Binary<SGT_S>,
675 &ARBDecompiler::Binary<SNE_S>,
676 &ARBDecompiler::Binary<SGE_S>,
677
678 &ARBDecompiler::Binary<SLT_U>,
679 &ARBDecompiler::Binary<SEQ_U>,
680 &ARBDecompiler::Binary<SLE_U>,
681 &ARBDecompiler::Binary<SGT_U>,
682 &ARBDecompiler::Binary<SNE_U>,
683 &ARBDecompiler::Binary<SGE_U>,
684
685 &ARBDecompiler::LogicalAddCarry,
686
687 &ARBDecompiler::HalfComparison<SLT_F, false>,
688 &ARBDecompiler::HalfComparison<SEQ_F, false>,
689 &ARBDecompiler::HalfComparison<SLE_F, false>,
690 &ARBDecompiler::HalfComparison<SGT_F, false>,
691 &ARBDecompiler::HalfComparison<SNE_F, false>,
692 &ARBDecompiler::HalfComparison<SGE_F, false>,
693 &ARBDecompiler::HalfComparison<SLT_F, true>,
694 &ARBDecompiler::HalfComparison<SEQ_F, true>,
695 &ARBDecompiler::HalfComparison<SLE_F, true>,
696 &ARBDecompiler::HalfComparison<SGT_F, true>,
697 &ARBDecompiler::HalfComparison<SNE_F, true>,
698 &ARBDecompiler::HalfComparison<SGE_F, true>,
699
700 &ARBDecompiler::Texture,
701 &ARBDecompiler::Texture,
702 &ARBDecompiler::TextureGather,
703 &ARBDecompiler::TextureQueryDimensions,
704 &ARBDecompiler::TextureQueryLod,
705 &ARBDecompiler::TexelFetch,
706 &ARBDecompiler::TextureGradient,
707
708 &ARBDecompiler::ImageLoad,
709 &ARBDecompiler::ImageStore,
710
711 &ARBDecompiler::AtomicImage<ADD, U32>,
712 &ARBDecompiler::AtomicImage<AND, U32>,
713 &ARBDecompiler::AtomicImage<OR, U32>,
714 &ARBDecompiler::AtomicImage<XOR, U32>,
715 &ARBDecompiler::AtomicImage<EXCH, U32>,
716
717 &ARBDecompiler::Atomic<EXCH, U32>,
718 &ARBDecompiler::Atomic<ADD, U32>,
719 &ARBDecompiler::Atomic<MIN, U32>,
720 &ARBDecompiler::Atomic<MAX, U32>,
721 &ARBDecompiler::Atomic<AND, U32>,
722 &ARBDecompiler::Atomic<OR, U32>,
723 &ARBDecompiler::Atomic<XOR, U32>,
724
725 &ARBDecompiler::Atomic<EXCH, S32>,
726 &ARBDecompiler::Atomic<ADD, S32>,
727 &ARBDecompiler::Atomic<MIN, S32>,
728 &ARBDecompiler::Atomic<MAX, S32>,
729 &ARBDecompiler::Atomic<AND, S32>,
730 &ARBDecompiler::Atomic<OR, S32>,
731 &ARBDecompiler::Atomic<XOR, S32>,
732
733 &ARBDecompiler::Atomic<ADD, U32>,
734 &ARBDecompiler::Atomic<MIN, U32>,
735 &ARBDecompiler::Atomic<MAX, U32>,
736 &ARBDecompiler::Atomic<AND, U32>,
737 &ARBDecompiler::Atomic<OR, U32>,
738 &ARBDecompiler::Atomic<XOR, U32>,
739
740 &ARBDecompiler::Atomic<ADD, S32>,
741 &ARBDecompiler::Atomic<MIN, S32>,
742 &ARBDecompiler::Atomic<MAX, S32>,
743 &ARBDecompiler::Atomic<AND, S32>,
744 &ARBDecompiler::Atomic<OR, S32>,
745 &ARBDecompiler::Atomic<XOR, S32>,
746
747 &ARBDecompiler::Branch,
748 &ARBDecompiler::BranchIndirect,
749 &ARBDecompiler::PushFlowStack,
750 &ARBDecompiler::PopFlowStack,
751 &ARBDecompiler::Exit,
752 &ARBDecompiler::Discard,
753
754 &ARBDecompiler::EmitVertex,
755 &ARBDecompiler::EndPrimitive,
756
757 &ARBDecompiler::InvocationId,
758 &ARBDecompiler::YNegate,
759 &ARBDecompiler::LocalInvocationId<'x'>,
760 &ARBDecompiler::LocalInvocationId<'y'>,
761 &ARBDecompiler::LocalInvocationId<'z'>,
762 &ARBDecompiler::WorkGroupId<'x'>,
763 &ARBDecompiler::WorkGroupId<'y'>,
764 &ARBDecompiler::WorkGroupId<'z'>,
765
766 &ARBDecompiler::Unary<TGBALLOT_U>,
767 &ARBDecompiler::Unary<TGALL_U>,
768 &ARBDecompiler::Unary<TGANY_U>,
769 &ARBDecompiler::Unary<TGEQ_U>,
770
771 &ARBDecompiler::ThreadId,
772 &ARBDecompiler::ThreadMask<'e', 'q'>,
773 &ARBDecompiler::ThreadMask<'g', 'e'>,
774 &ARBDecompiler::ThreadMask<'g', 't'>,
775 &ARBDecompiler::ThreadMask<'l', 'e'>,
776 &ARBDecompiler::ThreadMask<'l', 't'>,
777 &ARBDecompiler::ShuffleIndexed,
778
779 &ARBDecompiler::Barrier,
780 &ARBDecompiler::MemoryBarrierGroup,
781 &ARBDecompiler::MemoryBarrierGlobal,
782 };
783};
784
785ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
786 ShaderType stage, std::string_view identifier)
787 : device{device}, ir{ir}, registry{registry}, stage{stage} {
788 AddLine("TEMP RC;");
789 if (ir.IsDecompiled()) {
790 DecompileAST();
791 } else {
792 DecompileBranchMode();
793 }
794 AddLine("END");
795
796 const std::string code = std::move(shader_source);
797 DeclareHeader();
798 DeclareVertex();
799 DeclareGeometry();
800 DeclareFragment();
801 DeclareCompute();
802 DeclareInputAttributes();
803 DeclareOutputAttributes();
804 DeclareLocalMemory();
805 DeclareGlobalMemory();
806 DeclareConstantBuffers();
807 DeclareRegisters();
808 DeclareTemporaries();
809 DeclarePredicates();
810 DeclareInternalFlags();
811
812 shader_source += code;
813}
814
815std::string_view HeaderStageName(ShaderType stage) {
816 switch (stage) {
817 case ShaderType::Vertex:
818 return "vp";
819 case ShaderType::Geometry:
820 return "gp";
821 case ShaderType::Fragment:
822 return "fp";
823 case ShaderType::Compute:
824 return "cp";
825 default:
826 UNREACHABLE();
827 return "";
828 }
829}
830
831void ARBDecompiler::DeclareHeader() {
832 AddLine("!!NV{}5.0", HeaderStageName(stage));
833 // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
834 AddLine("OPTION NV_internal;");
835 AddLine("OPTION NV_gpu_program_fp64;");
836 AddLine("OPTION NV_shader_storage_buffer;");
837 AddLine("OPTION NV_shader_thread_group;");
838 if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
839 AddLine("OPTION NV_shader_thread_shuffle;");
840 }
841 if (stage == ShaderType::Vertex) {
842 if (device.HasNvViewportArray2()) {
843 AddLine("OPTION NV_viewport_array2;");
844 }
845 }
846 if (stage == ShaderType::Fragment) {
847 AddLine("OPTION ARB_draw_buffers;");
848 }
849 if (device.HasImageLoadFormatted()) {
850 AddLine("OPTION EXT_shader_image_load_formatted;");
851 }
852}
853
854void ARBDecompiler::DeclareVertex() {
855 if (stage != ShaderType::Vertex) {
856 return;
857 }
858 AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
859}
860
861void ARBDecompiler::DeclareGeometry() {
862 if (stage != ShaderType::Geometry) {
863 return;
864 }
865 const auto& info = registry.GetGraphicsInfo();
866 const auto& header = ir.GetHeader();
867 AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
868 AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
869 AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
870 AddLine("ATTRIB vertex_position = vertex.position;");
871}
872
873void ARBDecompiler::DeclareFragment() {
874 if (stage != ShaderType::Fragment) {
875 return;
876 }
877 AddLine("OUTPUT result_color7 = result.color[7];");
878 AddLine("OUTPUT result_color6 = result.color[6];");
879 AddLine("OUTPUT result_color5 = result.color[5];");
880 AddLine("OUTPUT result_color4 = result.color[4];");
881 AddLine("OUTPUT result_color3 = result.color[3];");
882 AddLine("OUTPUT result_color2 = result.color[2];");
883 AddLine("OUTPUT result_color1 = result.color[1];");
884 AddLine("OUTPUT result_color0 = result.color;");
885}
886
887void ARBDecompiler::DeclareCompute() {
888 if (stage != ShaderType::Compute) {
889 return;
890 }
891 const ComputeInfo& info = registry.GetComputeInfo();
892 AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
893 info.workgroup_size[2]);
894 if (info.shared_memory_size_in_words > 0) {
895 const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
896 AddLine("SHARED_MEMORY {};", size_in_bytes);
897 AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
898 }
899}
900
901void ARBDecompiler::DeclareInputAttributes() {
902 if (stage == ShaderType::Compute) {
903 return;
904 }
905 const std::string_view stage_name = StageInputName(stage);
906 for (const auto attribute : ir.GetInputAttributes()) {
907 if (!IsGenericAttribute(attribute)) {
908 continue;
909 }
910 const u32 index = GetGenericAttributeIndex(attribute);
911
912 std::string_view suffix;
913 if (stage == ShaderType::Fragment) {
914 const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
915 if (input_mode == PixelImap::Unused) {
916 return;
917 }
918 suffix = GetInputFlags(input_mode);
919 }
920 AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
921 index);
922 }
923}
924
925void ARBDecompiler::DeclareOutputAttributes() {
926 if (stage == ShaderType::Compute) {
927 return;
928 }
929 for (const auto attribute : ir.GetOutputAttributes()) {
930 if (!IsGenericAttribute(attribute)) {
931 continue;
932 }
933 const u32 index = GetGenericAttributeIndex(attribute);
934 AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
935 }
936}
937
938void ARBDecompiler::DeclareLocalMemory() {
939 u64 size = 0;
940 if (stage == ShaderType::Compute) {
941 size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
942 } else {
943 size = ir.GetHeader().GetLocalMemorySize();
944 }
945 if (size == 0) {
946 return;
947 }
948 const u64 element_count = Common::AlignUp(size, 4) / 4;
949 AddLine("TEMP lmem[{}];", element_count);
950}
951
952void ARBDecompiler::DeclareGlobalMemory() {
953 u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
954 for (const auto& pair : ir.GetGlobalMemory()) {
955 const auto& base = pair.first;
956 AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
957 ++binding;
958 }
959}
960
961void ARBDecompiler::DeclareConstantBuffers() {
962 u32 binding = 0;
963 for (const auto& cbuf : ir.GetConstantBuffers()) {
964 AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
965 ++binding;
966 }
967}
968
969void ARBDecompiler::DeclareRegisters() {
970 for (const u32 gpr : ir.GetRegisters()) {
971 AddLine("TEMP R{};", gpr);
972 }
973}
974
975void ARBDecompiler::DeclareTemporaries() {
976 for (std::size_t i = 0; i < max_temporaries; ++i) {
977 AddLine("TEMP T{};", i);
978 }
979}
980
981void ARBDecompiler::DeclarePredicates() {
982 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
983 AddLine("TEMP P{};", static_cast<u64>(pred));
984 }
985}
986
987void ARBDecompiler::DeclareInternalFlags() {
988 for (const char* name : INTERNAL_FLAG_NAMES) {
989 AddLine("TEMP {};", name);
990 }
991}
992
993void ARBDecompiler::InitializeVariables() {
994 if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
995 AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
996 }
997 for (const auto attribute : ir.GetOutputAttributes()) {
998 if (!IsGenericAttribute(attribute)) {
999 continue;
1000 }
1001 const u32 index = GetGenericAttributeIndex(attribute);
1002 AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
1003 }
1004 for (const u32 gpr : ir.GetRegisters()) {
1005 AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
1006 }
1007 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1008 AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
1009 }
1010}
1011
1012void ARBDecompiler::DecompileAST() {
1013 const u32 num_flow_variables = ir.GetASTNumVariables();
1014 for (u32 i = 0; i < num_flow_variables; ++i) {
1015 AddLine("TEMP F{};", i);
1016 }
1017 for (u32 i = 0; i < num_flow_variables; ++i) {
1018 AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
1019 }
1020
1021 InitializeVariables();
1022
1023 VisitAST(ir.GetASTProgram());
1024}
1025
1026void ARBDecompiler::DecompileBranchMode() {
1027 static constexpr u32 FLOW_STACK_SIZE = 20;
1028 if (!ir.IsFlowStackDisabled()) {
1029 AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
1030 AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
1031 AddLine("TEMP SSY_TOP;");
1032 AddLine("TEMP PBK_TOP;");
1033 }
1034
1035 AddLine("TEMP PC;");
1036
1037 if (!ir.IsFlowStackDisabled()) {
1038 AddLine("MOV.U SSY_TOP.x, 0;");
1039 AddLine("MOV.U PBK_TOP.x, 0;");
1040 }
1041
1042 InitializeVariables();
1043
1044 const auto basic_block_end = ir.GetBasicBlocks().end();
1045 auto basic_block_it = ir.GetBasicBlocks().begin();
1046 const u32 first_address = basic_block_it->first;
1047 AddLine("MOV.U PC.x, {};", first_address);
1048
1049 AddLine("REP;");
1050
1051 std::size_t num_blocks = 0;
1052 while (basic_block_it != basic_block_end) {
1053 const auto& [address, bb] = *basic_block_it;
1054 ++num_blocks;
1055
1056 AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
1057 AddLine("IF NE.x;");
1058
1059 VisitBlock(bb);
1060
1061 ++basic_block_it;
1062
1063 if (basic_block_it != basic_block_end) {
1064 const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
1065 if (!op || op->GetCode() != OperationCode::Branch) {
1066 const u32 next_address = basic_block_it->first;
1067 AddLine("MOV.U PC.x, {};", next_address);
1068 AddLine("CONT;");
1069 }
1070 }
1071
1072 AddLine("ELSE;");
1073 }
1074 AddLine("RET;");
1075 while (num_blocks--) {
1076 AddLine("ENDIF;");
1077 }
1078
1079 AddLine("ENDREP;");
1080}
1081
1082void ARBDecompiler::VisitAST(const ASTNode& node) {
1083 if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
1084 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1085 VisitAST(current);
1086 }
1087 } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
1088 const std::string condition = VisitExpression(ast->condition);
1089 ResetTemporaries();
1090
1091 AddLine("MOVC.U RC.x, {};", condition);
1092 AddLine("IF NE.x;");
1093 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1094 VisitAST(current);
1095 }
1096 AddLine("ENDIF;");
1097 } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
1098 AddLine("ELSE;");
1099 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1100 VisitAST(current);
1101 }
1102 } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
1103 VisitBlock(ast->nodes);
1104 } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
1105 AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
1106 ResetTemporaries();
1107 } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
1108 const std::string condition = VisitExpression(ast->condition);
1109 ResetTemporaries();
1110 AddLine("REP;");
1111 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1112 VisitAST(current);
1113 }
1114 AddLine("MOVC.U RC.x, {};", condition);
1115 AddLine("BRK (NE.x);");
1116 AddLine("ENDREP;");
1117 } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
1118 const bool is_true = ExprIsTrue(ast->condition);
1119 if (!is_true) {
1120 AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
1121 AddLine("IF NE.x;");
1122 ResetTemporaries();
1123 }
1124 if (ast->kills) {
1125 AddLine("KIL TR;");
1126 } else {
1127 Exit();
1128 }
1129 if (!is_true) {
1130 AddLine("ENDIF;");
1131 }
1132 } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
1133 if (ExprIsTrue(ast->condition)) {
1134 AddLine("BRK;");
1135 } else {
1136 AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
1137 AddLine("BRK (NE.x);");
1138 ResetTemporaries();
1139 }
1140 } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
1141 // Nothing to do
1142 } else {
1143 UNREACHABLE();
1144 }
1145}
1146
1147std::string ARBDecompiler::VisitExpression(const Expr& node) {
1148 const std::string result = AllocTemporary();
1149 if (const auto expr = std::get_if<ExprAnd>(&*node)) {
1150 AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
1151 VisitExpression(expr->operand2));
1152 return result;
1153 }
1154 if (const auto expr = std::get_if<ExprOr>(&*node)) {
1155 const std::string result = AllocTemporary();
1156 AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
1157 VisitExpression(expr->operand2));
1158 return result;
1159 }
1160 if (const auto expr = std::get_if<ExprNot>(&*node)) {
1161 const std::string result = AllocTemporary();
1162 AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
1163 return result;
1164 }
1165 if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
1166 return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
1167 }
1168 if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
1169 return Visit(ir.GetConditionCode(expr->cc));
1170 }
1171 if (const auto expr = std::get_if<ExprVar>(&*node)) {
1172 return fmt::format("F{}.x", expr->var_index);
1173 }
1174 if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
1175 return expr->value ? "0xffffffff" : "0";
1176 }
1177 if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
1178 const std::string result = AllocTemporary();
1179 AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
1180 return result;
1181 }
1182 UNREACHABLE();
1183 return "0";
1184}
1185
1186void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
1187 for (const auto& node : bb) {
1188 Visit(node);
1189 }
1190}
1191
1192std::string ARBDecompiler::Visit(const Node& node) {
1193 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1194 if (const auto amend_index = operation->GetAmendIndex()) {
1195 Visit(ir.GetAmendNode(*amend_index));
1196 }
1197 const std::size_t index = static_cast<std::size_t>(operation->GetCode());
1198 if (index >= OPERATION_DECOMPILERS.size()) {
1199 UNREACHABLE_MSG("Out of bounds operation: {}", index);
1200 return {};
1201 }
1202 const auto decompiler = OPERATION_DECOMPILERS[index];
1203 if (decompiler == nullptr) {
1204 UNREACHABLE_MSG("Undefined operation: {}", index);
1205 return {};
1206 }
1207 return (this->*decompiler)(*operation);
1208 }
1209
1210 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1211 const u32 index = gpr->GetIndex();
1212 if (index == Register::ZeroIndex) {
1213 return "{0, 0, 0, 0}.x";
1214 }
1215 return fmt::format("R{}.x", index);
1216 }
1217
1218 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1219 return fmt::format("CV{}.x", cv->GetIndex());
1220 }
1221
1222 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1223 const std::string temporary = AllocTemporary();
1224 AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
1225 return temporary;
1226 }
1227
1228 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1229 const std::string temporary = AllocTemporary();
1230 switch (const auto index = predicate->GetIndex(); index) {
1231 case Tegra::Shader::Pred::UnusedIndex:
1232 AddLine("MOV.S {}, -1;", temporary);
1233 break;
1234 case Tegra::Shader::Pred::NeverExecute:
1235 AddLine("MOV.S {}, 0;", temporary);
1236 break;
1237 default:
1238 AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
1239 break;
1240 }
1241 if (predicate->IsNegated()) {
1242 AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
1243 }
1244 return temporary;
1245 }
1246
1247 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1248 if (abuf->IsPhysicalBuffer()) {
1249 UNIMPLEMENTED_MSG("Physical buffers are not implemented");
1250 return "{0, 0, 0, 0}.x";
1251 }
1252
1253 const auto buffer_index = [this, &abuf]() -> std::string {
1254 if (stage != ShaderType::Geometry) {
1255 return "";
1256 }
1257 return fmt::format("[{}]", Visit(abuf->GetBuffer()));
1258 };
1259
1260 const Attribute::Index index = abuf->GetIndex();
1261 const u32 element = abuf->GetElement();
1262 const char swizzle = Swizzle(element);
1263 switch (index) {
1264 case Attribute::Index::Position: {
1265 if (stage == ShaderType::Geometry) {
1266 return fmt::format("{}_position[{}].{}", StageInputName(stage),
1267 Visit(abuf->GetBuffer()), swizzle);
1268 } else {
1269 return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
1270 }
1271 }
1272 case Attribute::Index::TessCoordInstanceIDVertexID:
1273 ASSERT(stage == ShaderType::Vertex);
1274 switch (element) {
1275 case 2:
1276 return "vertex.instance";
1277 case 3:
1278 return "vertex.id";
1279 }
1280 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1281 break;
1282 case Attribute::Index::PointCoord:
1283 switch (element) {
1284 case 0:
1285 return "fragment.pointcoord.x";
1286 case 1:
1287 return "fragment.pointcoord.y";
1288 }
1289 UNIMPLEMENTED();
1290 break;
1291 case Attribute::Index::FrontFacing: {
1292 ASSERT(stage == ShaderType::Fragment);
1293 ASSERT(element == 3);
1294 const std::string temporary = AllocVectorTemporary();
1295 AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
1296 AddLine("MOV.U.CC RC.x, -RC;");
1297 AddLine("MOV.S {}.x, 0;", temporary);
1298 AddLine("MOV.S {}.x (NE.x), -1;", temporary);
1299 return fmt::format("{}.x", temporary);
1300 }
1301 default:
1302 if (IsGenericAttribute(index)) {
1303 if (stage == ShaderType::Geometry) {
1304 return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
1305 Visit(abuf->GetBuffer()), swizzle);
1306 } else {
1307 return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
1308 GetGenericAttributeIndex(index), swizzle);
1309 }
1310 }
1311 UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
1312 break;
1313 }
1314 return "{0, 0, 0, 0}.x";
1315 }
1316
1317 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1318 std::string offset_string;
1319 const auto& offset = cbuf->GetOffset();
1320 if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
1321 offset_string = std::to_string(imm->GetValue());
1322 } else {
1323 offset_string = Visit(offset);
1324 }
1325 const std::string temporary = AllocTemporary();
1326 AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
1327 return temporary;
1328 }
1329
1330 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1331 const std::string temporary = AllocTemporary();
1332 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
1333 Visit(gmem->GetBaseAddress()));
1334 AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
1335 temporary);
1336 return temporary;
1337 }
1338
1339 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1340 const std::string temporary = Visit(lmem->GetAddress());
1341 AddLine("SHR.U {}, {}, 2;", temporary, temporary);
1342 AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
1343 return temporary;
1344 }
1345
1346 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1347 const std::string temporary = Visit(smem->GetAddress());
1348 AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
1349 return temporary;
1350 }
1351
1352 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1353 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1354 return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1355 }
1356
1357 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1358 if (const auto amend_index = conditional->GetAmendIndex()) {
1359 Visit(ir.GetAmendNode(*amend_index));
1360 }
1361 AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
1362 AddLine("IF NE.x;");
1363 VisitBlock(conditional->GetCode());
1364 AddLine("ENDIF;");
1365 return {};
1366 }
1367
1368 if (const auto cmt = std::get_if<CommentNode>(&*node)) {
1369 // Uncommenting this will generate invalid code. GLASM lacks comments.
1370 // AddLine("// {}", cmt->GetText());
1371 return {};
1372 }
1373
1374 UNIMPLEMENTED();
1375 return {};
1376}
1377
1378std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
1379 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1380 UNIMPLEMENTED_IF(meta.sampler.is_indexed);
1381 UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
1382 meta.sampler.type == Tegra::Shader::TextureType::TextureCube);
1383
1384 const std::size_t count = operation.GetOperandsCount();
1385 std::string temporary = AllocVectorTemporary();
1386 std::size_t i = 0;
1387 for (; i < count; ++i) {
1388 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1389 }
1390 if (meta.sampler.is_array) {
1391 AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
1392 }
1393 if (meta.sampler.is_shadow) {
1394 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
1395 }
1396 return {std::move(temporary), i};
1397}
1398
1399std::string ARBDecompiler::BuildAoffi(Operation operation) {
1400 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1401 if (meta.aoffi.empty()) {
1402 return {};
1403 }
1404 const std::string temporary = AllocVectorTemporary();
1405 std::size_t i = 0;
1406 for (auto& node : meta.aoffi) {
1407 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
1408 }
1409 return fmt::format(", offset({})", temporary);
1410}
1411
1412void ARBDecompiler::Exit() {
1413 if (stage != ShaderType::Fragment) {
1414 AddLine("RET;");
1415 return;
1416 }
1417
1418 const auto safe_get_register = [this](u32 reg) -> std::string {
1419 // TODO(Rodrigo): Replace with contains once C++20 releases
1420 const auto& used_registers = ir.GetRegisters();
1421 if (used_registers.find(reg) != used_registers.end()) {
1422 return fmt::format("R{}.x", reg);
1423 }
1424 return "{0, 0, 0, 0}.x";
1425 };
1426
1427 const auto& header = ir.GetHeader();
1428 u32 current_reg = 0;
1429 for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
1430 for (u32 component = 0; component < 4; ++component) {
1431 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
1432 continue;
1433 }
1434 AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
1435 safe_get_register(current_reg));
1436 ++current_reg;
1437 }
1438 }
1439 if (header.ps.omap.depth) {
1440 AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
1441 }
1442
1443 AddLine("RET;");
1444}
1445
1446std::string ARBDecompiler::Assign(Operation operation) {
1447 const Node& dest = operation[0];
1448 const Node& src = operation[1];
1449
1450 std::string dest_name;
1451 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1452 if (gpr->GetIndex() == Register::ZeroIndex) {
1453 // Writing to Register::ZeroIndex is a no op
1454 return {};
1455 }
1456 dest_name = fmt::format("R{}.x", gpr->GetIndex());
1457 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1458 const u32 element = abuf->GetElement();
1459 const char swizzle = Swizzle(element);
1460 switch (const Attribute::Index index = abuf->GetIndex()) {
1461 case Attribute::Index::Position:
1462 dest_name = fmt::format("result.position.{}", swizzle);
1463 break;
1464 case Attribute::Index::LayerViewportPointSize:
1465 switch (element) {
1466 case 0:
1467 UNIMPLEMENTED();
1468 return {};
1469 case 1:
1470 case 2:
1471 if (!device.HasNvViewportArray2()) {
1472 LOG_ERROR(
1473 Render_OpenGL,
1474 "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
1475 return {};
1476 }
1477 dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
1478 break;
1479 case 3:
1480 dest_name = "result.pointsize.x";
1481 break;
1482 }
1483 break;
1484 case Attribute::Index::ClipDistances0123:
1485 dest_name = fmt::format("result.clip[{}].x", element);
1486 break;
1487 case Attribute::Index::ClipDistances4567:
1488 dest_name = fmt::format("result.clip[{}].x", element + 4);
1489 break;
1490 default:
1491 if (!IsGenericAttribute(index)) {
1492 UNREACHABLE();
1493 return {};
1494 }
1495 dest_name =
1496 fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
1497 break;
1498 }
1499 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1500 const std::string address = Visit(lmem->GetAddress());
1501 AddLine("SHR.U {}, {}, 2;", address, address);
1502 dest_name = fmt::format("lmem[{}].x", address);
1503 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1504 AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
1505 ResetTemporaries();
1506 return {};
1507 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1508 const std::string temporary = AllocTemporary();
1509 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
1510 Visit(gmem->GetBaseAddress()));
1511 AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
1512 temporary);
1513 ResetTemporaries();
1514 return {};
1515 } else {
1516 UNREACHABLE();
1517 ResetTemporaries();
1518 return {};
1519 }
1520
1521 AddLine("MOV.U {}, {};", dest_name, Visit(src));
1522 ResetTemporaries();
1523 return {};
1524}
1525
1526std::string ARBDecompiler::Select(Operation operation) {
1527 const std::string temporary = AllocTemporary();
1528 AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
1529 Visit(operation[2]));
1530 return temporary;
1531}
1532
1533std::string ARBDecompiler::FClamp(Operation operation) {
1534 // 1.0f in hex, replace with std::bit_cast on C++20
1535 static constexpr u32 POSITIVE_ONE = 0x3f800000;
1536
1537 const std::string temporary = AllocTemporary();
1538 const Node& value = operation[0];
1539 const Node& low = operation[1];
1540 const Node& high = operation[2];
1541 const auto imm_low = std::get_if<ImmediateNode>(&*low);
1542 const auto imm_high = std::get_if<ImmediateNode>(&*high);
1543 if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
1544 AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
1545 } else {
1546 AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
1547 AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
1548 }
1549 return temporary;
1550}
1551
1552std::string ARBDecompiler::FCastHalf0(Operation operation) {
1553 const std::string temporary = AllocVectorTemporary();
1554 AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
1555 return fmt::format("{}.x", temporary);
1556}
1557
1558std::string ARBDecompiler::FCastHalf1(Operation operation) {
1559 const std::string temporary = AllocVectorTemporary();
1560 AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
1561 AddLine("MOV {}.x, {}.y;", temporary, temporary);
1562 return fmt::format("{}.x", temporary);
1563}
1564
1565std::string ARBDecompiler::FSqrt(Operation operation) {
1566 const std::string temporary = AllocTemporary();
1567 AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
1568 AddLine("RCP.F32 {}, {};", temporary, temporary);
1569 return temporary;
1570}
1571
1572std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
1573 LOG_WARNING(Render_OpenGL, "(STUBBED)");
1574 const std::string temporary = AllocTemporary();
1575 AddLine("ADD.F {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
1576 return temporary;
1577}
1578
1579std::string ARBDecompiler::HAdd2(Operation operation) {
1580 const std::string tmp1 = AllocVectorTemporary();
1581 const std::string tmp2 = AllocVectorTemporary();
1582 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1583 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1584 AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1585 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1586 return fmt::format("{}.x", tmp1);
1587}
1588
1589std::string ARBDecompiler::HMul2(Operation operation) {
1590 const std::string tmp1 = AllocVectorTemporary();
1591 const std::string tmp2 = AllocVectorTemporary();
1592 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1593 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1594 AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1595 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1596 return fmt::format("{}.x", tmp1);
1597}
1598
1599std::string ARBDecompiler::HFma2(Operation operation) {
1600 const std::string tmp1 = AllocVectorTemporary();
1601 const std::string tmp2 = AllocVectorTemporary();
1602 const std::string tmp3 = AllocVectorTemporary();
1603 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1604 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1605 AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
1606 AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
1607 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1608 return fmt::format("{}.x", tmp1);
1609}
1610
1611std::string ARBDecompiler::HAbsolute(Operation operation) {
1612 const std::string temporary = AllocVectorTemporary();
1613 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1614 AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
1615 return fmt::format("{}.x", temporary);
1616}
1617
1618std::string ARBDecompiler::HNegate(Operation operation) {
1619 const std::string temporary = AllocVectorTemporary();
1620 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1621 AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
1622 AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
1623 AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
1624 AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
1625 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1626 return fmt::format("{}.x", temporary);
1627}
1628
1629std::string ARBDecompiler::HClamp(Operation operation) {
1630 const std::string tmp1 = AllocVectorTemporary();
1631 const std::string tmp2 = AllocVectorTemporary();
1632 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1633 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
1634 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1635 AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
1636 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
1637 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1638 AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
1639 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1640 return fmt::format("{}.x", tmp1);
1641}
1642
1643std::string ARBDecompiler::HCastFloat(Operation operation) {
1644 const std::string temporary = AllocVectorTemporary();
1645 AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
1646 AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
1647 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1648 return fmt::format("{}.x", temporary);
1649}
1650
1651std::string ARBDecompiler::HUnpack(Operation operation) {
1652 const std::string operand = Visit(operation[0]);
1653 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1654 case Tegra::Shader::HalfType::H0_H1:
1655 return operand;
1656 case Tegra::Shader::HalfType::F32: {
1657 const std::string temporary = AllocVectorTemporary();
1658 AddLine("MOV.U {}.x, {};", temporary, operand);
1659 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1660 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1661 return fmt::format("{}.x", temporary);
1662 }
1663 case Tegra::Shader::HalfType::H0_H0: {
1664 const std::string temporary = AllocVectorTemporary();
1665 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1666 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1667 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1668 return fmt::format("{}.x", temporary);
1669 }
1670 case Tegra::Shader::HalfType::H1_H1: {
1671 const std::string temporary = AllocVectorTemporary();
1672 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1673 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
1674 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1675 return fmt::format("{}.x", temporary);
1676 }
1677 }
1678 UNREACHABLE();
1679 return "{0, 0, 0, 0}.x";
1680}
1681
1682std::string ARBDecompiler::HMergeF32(Operation operation) {
1683 const std::string temporary = AllocVectorTemporary();
1684 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1685 return fmt::format("{}.x", temporary);
1686}
1687
1688std::string ARBDecompiler::HMergeH0(Operation operation) {
1689 const std::string temporary = AllocVectorTemporary();
1690 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1691 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1692 AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
1693 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1694 return fmt::format("{}.x", temporary);
1695}
1696
1697std::string ARBDecompiler::HMergeH1(Operation operation) {
1698 const std::string temporary = AllocVectorTemporary();
1699 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1700 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1701 AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
1702 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1703 return fmt::format("{}.x", temporary);
1704}
1705
1706std::string ARBDecompiler::HPack2(Operation operation) {
1707 const std::string temporary = AllocVectorTemporary();
1708 AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
1709 AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
1710 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1711 return fmt::format("{}.x", temporary);
1712}
1713
1714std::string ARBDecompiler::LogicalAssign(Operation operation) {
1715 const Node& dest = operation[0];
1716 const Node& src = operation[1];
1717
1718 std::string target;
1719
1720 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1721 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1722
1723 const Tegra::Shader::Pred index = pred->GetIndex();
1724 switch (index) {
1725 case Tegra::Shader::Pred::NeverExecute:
1726 case Tegra::Shader::Pred::UnusedIndex:
1727 // Writing to these predicates is a no-op
1728 return {};
1729 }
1730 target = fmt::format("P{}.x", static_cast<u64>(index));
1731 } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
1732 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1733 target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1734 } else {
1735 UNREACHABLE();
1736 ResetTemporaries();
1737 return {};
1738 }
1739
1740 AddLine("MOV.U {}, {};", target, Visit(src));
1741 ResetTemporaries();
1742 return {};
1743}
1744
1745std::string ARBDecompiler::LogicalPick2(Operation operation) {
1746 const std::string temporary = AllocTemporary();
1747 const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
1748 AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
1749 return temporary;
1750}
1751
1752std::string ARBDecompiler::LogicalAnd2(Operation operation) {
1753 const std::string temporary = AllocTemporary();
1754 const std::string op = Visit(operation[0]);
1755 AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
1756 return temporary;
1757}
1758
1759std::string ARBDecompiler::FloatOrdered(Operation operation) {
1760 const std::string temporary = AllocTemporary();
1761 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1762 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1763 AddLine("MOV.S {}, -1;", temporary);
1764 AddLine("MOV.S {} (NAN.x), 0;", temporary);
1765 AddLine("MOV.S {} (NAN.y), 0;", temporary);
1766 return temporary;
1767}
1768
1769std::string ARBDecompiler::FloatUnordered(Operation operation) {
1770 const std::string temporary = AllocTemporary();
1771 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1772 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1773 AddLine("MOV.S {}, 0;", temporary);
1774 AddLine("MOV.S {} (NAN.x), -1;", temporary);
1775 AddLine("MOV.S {} (NAN.y), -1;", temporary);
1776 return temporary;
1777}
1778
1779std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
1780 const std::string temporary = AllocTemporary();
1781 AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
1782 AddLine("MOV.S {}, 0;", temporary);
1783 AddLine("IF CF.x;");
1784 AddLine("MOV.S {}, -1;", temporary);
1785 AddLine("ENDIF;");
1786 return temporary;
1787}
1788
1789std::string ARBDecompiler::Texture(Operation operation) {
1790 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1791 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1792 const auto [temporary, swizzle] = BuildCoords(operation);
1793
1794 std::string_view opcode = "TEX";
1795 std::string extra;
1796 if (meta.bias) {
1797 ASSERT(!meta.lod);
1798 opcode = "TXB";
1799
1800 if (swizzle < 4) {
1801 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
1802 } else {
1803 const std::string bias = AllocTemporary();
1804 AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
1805 extra = fmt::format(" {},", bias);
1806 }
1807 }
1808 if (meta.lod) {
1809 ASSERT(!meta.bias);
1810 opcode = "TXL";
1811
1812 if (swizzle < 4) {
1813 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1814 } else {
1815 const std::string lod = AllocTemporary();
1816 AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
1817 extra = fmt::format(" {},", lod);
1818 }
1819 }
1820
1821 AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
1822 TextureType(meta), BuildAoffi(operation));
1823 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1824 return fmt::format("{}.x", temporary);
1825}
1826
1827std::string ARBDecompiler::TextureGather(Operation operation) {
1828 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1829 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1830 const auto [temporary, swizzle] = BuildCoords(operation);
1831
1832 std::string comp;
1833 if (!meta.sampler.is_shadow) {
1834 const auto& immediate = std::get<ImmediateNode>(*meta.component);
1835 comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
1836 }
1837
1838 AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
1839 TextureType(meta), BuildAoffi(operation));
1840 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1841 return fmt::format("{}.x", temporary);
1842}
1843
1844std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
1845 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1846 const std::string temporary = AllocVectorTemporary();
1847 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1848
1849 ASSERT(!meta.sampler.is_array);
1850
1851 const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
1852 AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
1853 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1854 return fmt::format("{}.x", temporary);
1855}
1856
1857std::string ARBDecompiler::TextureQueryLod(Operation operation) {
1858 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1859 const std::string temporary = AllocVectorTemporary();
1860 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1861
1862 ASSERT(!meta.sampler.is_array);
1863
1864 const std::size_t count = operation.GetOperandsCount();
1865 for (std::size_t i = 0; i < count; ++i) {
1866 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1867 }
1868 AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
1869 AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
1870 AddLine("TRUNC.S {}, {};", temporary, temporary);
1871 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1872 return fmt::format("{}.x", temporary);
1873}
1874
1875std::string ARBDecompiler::TexelFetch(Operation operation) {
1876 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1877 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1878 const auto [temporary, swizzle] = BuildCoords(operation);
1879
1880 if (!meta.sampler.is_buffer) {
1881 ASSERT(swizzle < 4);
1882 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1883 }
1884 AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
1885 BuildAoffi(operation));
1886 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1887 return fmt::format("{}.x", temporary);
1888}
1889
1890std::string ARBDecompiler::TextureGradient(Operation operation) {
1891 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1892 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1893 const std::string ddx = AllocVectorTemporary();
1894 const std::string ddy = AllocVectorTemporary();
1895 const std::string coord = BuildCoords(operation).first;
1896
1897 const std::size_t num_components = meta.derivates.size() / 2;
1898 for (std::size_t index = 0; index < num_components; ++index) {
1899 const char swizzle = Swizzle(index);
1900 AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
1901 AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
1902 }
1903
1904 const std::string_view result = coord;
1905 AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
1906 TextureType(meta), BuildAoffi(operation));
1907 AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
1908 return fmt::format("{}.x", result);
1909}
1910
1911std::string ARBDecompiler::ImageLoad(Operation operation) {
1912 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1913 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1914 const std::size_t count = operation.GetOperandsCount();
1915 const std::string_view type = ImageType(meta.image.type);
1916
1917 const std::string temporary = AllocVectorTemporary();
1918 for (std::size_t i = 0; i < count; ++i) {
1919 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1920 }
1921 AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
1922 AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1923 return fmt::format("{}.x", temporary);
1924}
1925
1926std::string ARBDecompiler::ImageStore(Operation operation) {
1927 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1928 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1929 const std::size_t num_coords = operation.GetOperandsCount();
1930 const std::size_t num_values = meta.values.size();
1931 const std::string_view type = ImageType(meta.image.type);
1932
1933 const std::string coord = AllocVectorTemporary();
1934 const std::string value = AllocVectorTemporary();
1935 for (std::size_t i = 0; i < num_coords; ++i) {
1936 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
1937 }
1938 for (std::size_t i = 0; i < num_values; ++i) {
1939 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
1940 }
1941 AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
1942 return {};
1943}
1944
1945std::string ARBDecompiler::Branch(Operation operation) {
1946 const auto target = std::get<ImmediateNode>(*operation[0]);
1947 AddLine("MOV.U PC.x, {};", target.GetValue());
1948 AddLine("CONT;");
1949 return {};
1950}
1951
1952std::string ARBDecompiler::BranchIndirect(Operation operation) {
1953 AddLine("MOV.U PC.x, {};", Visit(operation[0]));
1954 AddLine("CONT;");
1955 return {};
1956}
1957
1958std::string ARBDecompiler::PushFlowStack(Operation operation) {
1959 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1960 const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
1961 const std::string_view stack_name = StackName(stack);
1962 AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
1963 AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
1964 return {};
1965}
1966
1967std::string ARBDecompiler::PopFlowStack(Operation operation) {
1968 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1969 const std::string_view stack_name = StackName(stack);
1970 AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
1971 AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
1972 AddLine("CONT;");
1973 return {};
1974}
1975
1976std::string ARBDecompiler::Exit(Operation) {
1977 Exit();
1978 return {};
1979}
1980
1981std::string ARBDecompiler::Discard(Operation) {
1982 AddLine("KIL TR;");
1983 return {};
1984}
1985
1986std::string ARBDecompiler::EmitVertex(Operation) {
1987 AddLine("EMIT;");
1988 return {};
1989}
1990
1991std::string ARBDecompiler::EndPrimitive(Operation) {
1992 AddLine("ENDPRIM;");
1993 return {};
1994}
1995
1996std::string ARBDecompiler::InvocationId(Operation) {
1997 return "primitive.invocation";
1998}
1999
2000std::string ARBDecompiler::YNegate(Operation) {
2001 LOG_WARNING(Render_OpenGL, "(STUBBED)");
2002 const std::string temporary = AllocTemporary();
2003 AddLine("MOV.F {}, 1;", temporary);
2004 return temporary;
2005}
2006
2007std::string ARBDecompiler::ThreadId(Operation) {
2008 return fmt::format("{}.threadid", StageInputName(stage));
2009}
2010
2011std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
2012 if (!device.HasWarpIntrinsics()) {
2013 LOG_ERROR(Render_OpenGL,
2014 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
2015 return Visit(operation[0]);
2016 }
2017 const std::string temporary = AllocVectorTemporary();
2018 AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
2019 Visit(operation[1]));
2020 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
2021 return fmt::format("{}.x", temporary);
2022}
2023
2024std::string ARBDecompiler::Barrier(Operation) {
2025 if (!ir.IsDecompiled()) {
2026 LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled");
2027 return {};
2028 }
2029 AddLine("BAR;");
2030 return {};
2031}
2032
2033std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
2034 AddLine("MEMBAR.CTA;");
2035 return {};
2036}
2037
2038std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
2039 AddLine("MEMBAR;");
2040 return {};
2041}
2042
2043} // Anonymous namespace
2044
2045std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
2046 const VideoCommon::Shader::Registry& registry,
2047 Tegra::Engines::ShaderType stage, std::string_view identifier) {
2048 return ARBDecompiler(device, ir, registry, stage, identifier).Code();
2049}
2050
2051} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
new file mode 100644
index 000000000..6afc87220
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h
@@ -0,0 +1,29 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <string_view>
9
10#include "common/common_types.h"
11
12namespace Tegra::Engines {
13enum class ShaderType : u32;
14}
15
16namespace VideoCommon::Shader {
17class ShaderIR;
18class Registry;
19} // namespace VideoCommon::Shader
20
21namespace OpenGL {
22
23class Device;
24
25std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
26 const VideoCommon::Shader::Registry& registry,
27 Tegra::Engines::ShaderType stage, std::string_view identifier);
28
29} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 890fc6c63..e245e27ec 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -213,6 +213,7 @@ Device::Device()
213 has_component_indexing_bug = is_amd; 213 has_component_indexing_bug = is_amd;
214 has_precise_bug = TestPreciseBug(); 214 has_precise_bug = TestPreciseBug();
215 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; 215 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
216 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
216 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && 217 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
217 GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && 218 GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
218 GLAD_GL_NV_transform_feedback2; 219 GLAD_GL_NV_transform_feedback2;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 98cca0254..145347943 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -88,6 +88,10 @@ public:
88 return has_fast_buffer_sub_data; 88 return has_fast_buffer_sub_data;
89 } 89 }
90 90
91 bool HasNvViewportArray2() const {
92 return has_nv_viewport_array2;
93 }
94
91 bool UseAssemblyShaders() const { 95 bool UseAssemblyShaders() const {
92 return use_assembly_shaders; 96 return use_assembly_shaders;
93 } 97 }
@@ -111,6 +115,7 @@ private:
111 bool has_component_indexing_bug{}; 115 bool has_component_indexing_bug{};
112 bool has_precise_bug{}; 116 bool has_precise_bug{};
113 bool has_fast_buffer_sub_data{}; 117 bool has_fast_buffer_sub_data{};
118 bool has_nv_viewport_array2{};
114 bool use_assembly_shaders{}; 119 bool use_assembly_shaders{};
115}; 120};
116 121
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a991ca64a..f539a05e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,6 +20,7 @@
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/shader_type.h" 21#include "video_core/engines/shader_type.h"
22#include "video_core/memory_manager.h" 22#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_arb_decompiler.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 24#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_cache.h" 25#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_decompiler.h" 26#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -147,7 +148,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
147 auto program = std::make_shared<ProgramHandle>(); 148 auto program = std::make_shared<ProgramHandle>();
148 149
149 if (device.UseAssemblyShaders()) { 150 if (device.UseAssemblyShaders()) {
150 const std::string arb = "Not implemented"; 151 const std::string arb =
152 DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
151 153
152 GLuint& arb_prog = program->assembly_program.handle; 154 GLuint& arb_prog = program->assembly_program.handle;
153 155