summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2018-12-20 22:29:15 -0300
committerGravatar ReinUsesLisp2019-01-15 17:54:50 -0300
commita4f052f6b3ea689539d3ccc11bde273986728d2e (patch)
tree14d068354dcb3dc696c863b0199b8549ae1b7d88 /src
parentglsl_decompiler: Implementation (diff)
downloadyuzu-a4f052f6b3ea689539d3ccc11bde273986728d2e.tar.gz
yuzu-a4f052f6b3ea689539d3ccc11bde273986728d2e.tar.xz
yuzu-a4f052f6b3ea689539d3ccc11bde273986728d2e.zip
video_core: Replace gl_shader_decompiler
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp3950
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp94
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h158
8 files changed, 57 insertions, 4185 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 86b06487d..b68f3273d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -42,8 +42,6 @@ add_library(video_core STATIC
42 renderer_opengl/gl_resource_manager.h 42 renderer_opengl/gl_resource_manager.h
43 renderer_opengl/gl_shader_cache.cpp 43 renderer_opengl/gl_shader_cache.cpp
44 renderer_opengl/gl_shader_cache.h 44 renderer_opengl/gl_shader_cache.h
45 renderer_opengl/gl_shader_decompiler.cpp
46 renderer_opengl/gl_shader_decompiler.h
47 renderer_opengl/gl_shader_gen.cpp 45 renderer_opengl/gl_shader_gen.cpp
48 renderer_opengl/gl_shader_gen.h 46 renderer_opengl/gl_shader_gen.h
49 renderer_opengl/gl_shader_manager.cpp 47 renderer_opengl/gl_shader_manager.cpp
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 73567eb8c..97412590b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -925,7 +925,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
925 const auto& gpu = Core::System::GetInstance().GPU(); 925 const auto& gpu = Core::System::GetInstance().GPU();
926 const auto& maxwell3d = gpu.Maxwell3D(); 926 const auto& maxwell3d = gpu.Maxwell3D();
927 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; 927 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
928 const auto& entries = shader->GetShaderEntries().const_buffer_entries; 928 const auto& entries = shader->GetShaderEntries().const_buffers;
929 929
930 constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; 930 constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
931 std::array<GLuint, max_binds> bind_buffers; 931 std::array<GLuint, max_binds> bind_buffers;
@@ -993,7 +993,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
993 MICROPROFILE_SCOPE(OpenGL_Texture); 993 MICROPROFILE_SCOPE(OpenGL_Texture);
994 const auto& gpu = Core::System::GetInstance().GPU(); 994 const auto& gpu = Core::System::GetInstance().GPU();
995 const auto& maxwell3d = gpu.Maxwell3D(); 995 const auto& maxwell3d = gpu.Maxwell3D();
996 const auto& entries = shader->GetShaderEntries().texture_samplers; 996 const auto& entries = shader->GetShaderEntries().samplers;
997 997
998 ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), 998 ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
999 "Exceeded the number of active textures."); 999 "Exceeded the number of active textures.");
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index c785fffa3..e5435d733 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -12,9 +12,13 @@
12#include "video_core/renderer_opengl/gl_shader_cache.h" 12#include "video_core/renderer_opengl/gl_shader_cache.h"
13#include "video_core/renderer_opengl/gl_shader_manager.h" 13#include "video_core/renderer_opengl/gl_shader_manager.h"
14#include "video_core/renderer_opengl/utils.h" 14#include "video_core/renderer_opengl/utils.h"
15#include "video_core/shader/glsl_decompiler.h"
16#include "video_core/shader/shader_ir.h"
15 17
16namespace OpenGL { 18namespace OpenGL {
17 19
20using VideoCommon::Shader::ProgramCode;
21
18/// Gets the address for the specified shader stage program 22/// Gets the address for the specified shader stage program
19static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { 23static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
20 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 24 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
@@ -24,8 +28,8 @@ static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
24} 28}
25 29
26/// Gets the shader program code from memory for the specified address 30/// Gets the shader program code from memory for the specified address
27static GLShader::ProgramCode GetShaderCode(VAddr addr) { 31static ProgramCode GetShaderCode(VAddr addr) {
28 GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); 32 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
29 Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); 33 Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
30 return program_code; 34 return program_code;
31} 35}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 768747968..aad1cf6be 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -13,6 +13,7 @@
13#include "video_core/rasterizer_cache.h" 13#include "video_core/rasterizer_cache.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_shader_gen.h" 15#include "video_core/renderer_opengl/gl_shader_gen.h"
16#include "video_core/shader/glsl_decompiler.h"
16 17
17namespace OpenGL { 18namespace OpenGL {
18 19
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
deleted file mode 100644
index 1bb09e61b..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ /dev/null
@@ -1,3950 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <optional>
7#include <set>
8#include <string>
9#include <string_view>
10#include <unordered_set>
11
12#include <fmt/format.h>
13
14#include "common/assert.h"
15#include "common/common_types.h"
16#include "video_core/engines/shader_bytecode.h"
17#include "video_core/engines/shader_header.h"
18#include "video_core/renderer_opengl/gl_rasterizer.h"
19#include "video_core/renderer_opengl/gl_shader_decompiler.h"
20
21namespace OpenGL::GLShader::Decompiler {
22
23using Tegra::Shader::Attribute;
24using Tegra::Shader::Instruction;
25using Tegra::Shader::LogicOperation;
26using Tegra::Shader::OpCode;
27using Tegra::Shader::Register;
28using Tegra::Shader::Sampler;
29using Tegra::Shader::SubOp;
30
31constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
32constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
33
34constexpr u32 MAX_GEOMETRY_BUFFERS = 6;
35constexpr u32 MAX_ATTRIBUTES = 0x100; // Size in vec4s, this value is untested
36
37static const char* INTERNAL_FLAG_NAMES[] = {"zero_flag", "sign_flag", "carry_flag",
38 "overflow_flag"};
39
40enum class InternalFlag : u64 {
41 ZeroFlag = 0,
42 SignFlag = 1,
43 CarryFlag = 2,
44 OverflowFlag = 3,
45 Amount
46};
47
48class DecompileFail : public std::runtime_error {
49public:
50 using std::runtime_error::runtime_error;
51};
52
53/// Generates code to use for a swizzle operation.
54static std::string GetSwizzle(u64 elem) {
55 ASSERT(elem <= 3);
56 std::string swizzle = ".";
57 swizzle += "xyzw"[elem];
58 return swizzle;
59}
60
61/// Translate topology
62static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
63 switch (topology) {
64 case Tegra::Shader::OutputTopology::PointList:
65 return "points";
66 case Tegra::Shader::OutputTopology::LineStrip:
67 return "line_strip";
68 case Tegra::Shader::OutputTopology::TriangleStrip:
69 return "triangle_strip";
70 default:
71 UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
72 return "points";
73 }
74}
75
76/// Describes the behaviour of code path of a given entry point and a return point.
77enum class ExitMethod {
78 Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
79 AlwaysReturn, ///< All code paths reach the return point.
80 Conditional, ///< Code path reaches the return point or an END instruction conditionally.
81 AlwaysEnd, ///< All code paths reach a END instruction.
82};
83
84/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction.
85struct Subroutine {
86 /// Generates a name suitable for GLSL source code.
87 std::string GetName() const {
88 return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix;
89 }
90
91 u32 begin; ///< Entry point of the subroutine.
92 u32 end; ///< Return point of the subroutine.
93 const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name
94 ExitMethod exit_method; ///< Exit method of the subroutine.
95 std::set<u32> labels; ///< Addresses refereced by JMP instructions.
96
97 bool operator<(const Subroutine& rhs) const {
98 return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
99 }
100};
101
102/// Analyzes shader code and produces a set of subroutines.
103class ControlFlowAnalyzer {
104public:
105 ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix)
106 : program_code(program_code), shader_coverage_begin(main_offset),
107 shader_coverage_end(main_offset + 1) {
108
109 // Recursively finds all subroutines.
110 const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix);
111 if (program_main.exit_method != ExitMethod::AlwaysEnd)
112 throw DecompileFail("Program does not always end");
113 }
114
115 std::set<Subroutine> GetSubroutines() {
116 return std::move(subroutines);
117 }
118
119 std::size_t GetShaderLength() const {
120 return shader_coverage_end * sizeof(u64);
121 }
122
123private:
124 const ProgramCode& program_code;
125 std::set<Subroutine> subroutines;
126 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
127 u32 shader_coverage_begin;
128 u32 shader_coverage_end;
129
130 /// Adds and analyzes a new subroutine if it is not added yet.
131 const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) {
132 Subroutine subroutine{begin, end, suffix, ExitMethod::Undetermined, {}};
133
134 const auto iter = subroutines.find(subroutine);
135 if (iter != subroutines.end()) {
136 return *iter;
137 }
138
139 subroutine.exit_method = Scan(begin, end, subroutine.labels);
140 if (subroutine.exit_method == ExitMethod::Undetermined) {
141 throw DecompileFail("Recursive function detected");
142 }
143
144 return *subroutines.insert(std::move(subroutine)).first;
145 }
146
147 /// Merges exit method of two parallel branches.
148 static ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
149 if (a == ExitMethod::Undetermined) {
150 return b;
151 }
152 if (b == ExitMethod::Undetermined) {
153 return a;
154 }
155 if (a == b) {
156 return a;
157 }
158 return ExitMethod::Conditional;
159 }
160
161 /// Scans a range of code for labels and determines the exit method.
162 ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
163 const auto [iter, inserted] =
164 exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
165 ExitMethod& exit_method = iter->second;
166 if (!inserted)
167 return exit_method;
168
169 for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
170 shader_coverage_begin = std::min(shader_coverage_begin, offset);
171 shader_coverage_end = std::max(shader_coverage_end, offset + 1);
172
173 const Instruction instr = {program_code[offset]};
174 if (const auto opcode = OpCode::Decode(instr)) {
175 switch (opcode->get().GetId()) {
176 case OpCode::Id::EXIT: {
177 // The EXIT instruction can be predicated, which means that the shader can
178 // conditionally end on this instruction. We have to consider the case where the
179 // condition is not met and check the exit method of that other basic block.
180 using Tegra::Shader::Pred;
181 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
182 return exit_method = ExitMethod::AlwaysEnd;
183 } else {
184 const ExitMethod not_met = Scan(offset + 1, end, labels);
185 return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
186 }
187 }
188 case OpCode::Id::BRA: {
189 const u32 target = offset + instr.bra.GetBranchTarget();
190 labels.insert(target);
191 const ExitMethod no_jmp = Scan(offset + 1, end, labels);
192 const ExitMethod jmp = Scan(target, end, labels);
193 return exit_method = ParallelExit(no_jmp, jmp);
194 }
195 case OpCode::Id::SSY:
196 case OpCode::Id::PBK: {
197 // The SSY and PBK use a similar encoding as the BRA instruction.
198 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
199 "Constant buffer branching is not supported");
200 const u32 target = offset + instr.bra.GetBranchTarget();
201 labels.insert(target);
202 // Continue scanning for an exit method.
203 break;
204 }
205 }
206 }
207 }
208 return exit_method = ExitMethod::AlwaysReturn;
209 }
210};
211
212template <typename T>
213class ShaderScopedScope {
214public:
215 explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr)
216 : writer(writer), end_expr(std::move(end_expr)) {
217
218 if (begin_expr.empty()) {
219 writer.AddLine('{');
220 } else {
221 writer.AddExpression(begin_expr);
222 writer.AddLine(" {");
223 }
224 ++writer.scope;
225 }
226
227 ShaderScopedScope(const ShaderScopedScope&) = delete;
228
229 ~ShaderScopedScope() {
230 --writer.scope;
231 if (end_expr.empty()) {
232 writer.AddLine('}');
233 } else {
234 writer.AddExpression("} ");
235 writer.AddExpression(end_expr);
236 writer.AddLine(';');
237 }
238 }
239
240 ShaderScopedScope& operator=(const ShaderScopedScope&) = delete;
241
242private:
243 T& writer;
244 std::string end_expr;
245};
246
247class ShaderWriter {
248public:
249 void AddExpression(std::string_view text) {
250 DEBUG_ASSERT(scope >= 0);
251 if (!text.empty()) {
252 AppendIndentation();
253 }
254 shader_source += text;
255 }
256
257 void AddLine(std::string_view text) {
258 AddExpression(text);
259 AddNewLine();
260 }
261
262 void AddLine(char character) {
263 DEBUG_ASSERT(scope >= 0);
264 AppendIndentation();
265 shader_source += character;
266 AddNewLine();
267 }
268
269 void AddNewLine() {
270 DEBUG_ASSERT(scope >= 0);
271 shader_source += '\n';
272 }
273
274 std::string GetResult() {
275 return std::move(shader_source);
276 }
277
278 ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {},
279 std::string end_expr = {}) {
280 return ShaderScopedScope(*this, begin_expr, end_expr);
281 }
282
283 int scope = 0;
284
285private:
286 void AppendIndentation() {
287 shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
288 }
289
290 std::string shader_source;
291};
292
293/**
294 * Represents an emulated shader register, used to track the state of that register for emulation
295 * with GLSL. At this time, a register can be used as a float or an integer. This class is used for
296 * bookkeeping within the GLSL program.
297 */
298class GLSLRegister {
299public:
300 enum class Type {
301 Float,
302 Integer,
303 UnsignedInteger,
304 };
305
306 GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
307
308 /// Gets the GLSL type string for a register
309 static std::string GetTypeString() {
310 return "float";
311 }
312
313 /// Gets the GLSL register prefix string, used for declarations and referencing
314 static std::string GetPrefixString() {
315 return "reg_";
316 }
317
318 /// Returns a GLSL string representing the current state of the register
319 std::string GetString() const {
320 return GetPrefixString() + std::to_string(index) + '_' + suffix;
321 }
322
323 /// Returns the index of the register
324 std::size_t GetIndex() const {
325 return index;
326 }
327
328private:
329 const std::size_t index;
330 const std::string& suffix;
331};
332
333/**
334 * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state
335 * of all registers (e.g. whether they are currently being used as Floats or Integers), and
336 * generates the necessary GLSL code to perform conversions as needed. This class is used for
337 * bookkeeping within the GLSL program.
338 */
339class GLSLRegisterManager {
340public:
341 GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations,
342 const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,
343 const Tegra::Shader::Header& header)
344 : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header},
345 fixed_pipeline_output_attributes_used{}, local_memory_size{0} {
346 BuildRegisterList();
347 BuildInputList();
348 }
349
350 void SetConditionalCodesFromExpression(const std::string& expresion) {
351 SetInternalFlag(InternalFlag::ZeroFlag, "(" + expresion + ") == 0");
352 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete.");
353 }
354
355 void SetConditionalCodesFromRegister(const Register& reg, u64 dest_elem = 0) {
356 SetConditionalCodesFromExpression(GetRegister(reg, static_cast<u32>(dest_elem)));
357 }
358
359 /**
360 * Returns code that does an integer size conversion for the specified size.
361 * @param value Value to perform integer size conversion on.
362 * @param size Register size to use for conversion instructions.
363 * @returns GLSL string corresponding to the value converted to the specified size.
364 */
365 static std::string ConvertIntegerSize(const std::string& value, Register::Size size) {
366 switch (size) {
367 case Register::Size::Byte:
368 return "((" + value + " << 24) >> 24)";
369 case Register::Size::Short:
370 return "((" + value + " << 16) >> 16)";
371 case Register::Size::Word:
372 // Default - do nothing
373 return value;
374 default:
375 UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
376 return value;
377 }
378 }
379
380 /**
381 * Gets a register as an float.
382 * @param reg The register to get.
383 * @param elem The element to use for the operation.
384 * @returns GLSL string corresponding to the register as a float.
385 */
386 std::string GetRegisterAsFloat(const Register& reg, unsigned elem = 0) {
387 return GetRegister(reg, elem);
388 }
389
390 /**
391 * Gets a register as an integer.
392 * @param reg The register to get.
393 * @param elem The element to use for the operation.
394 * @param is_signed Whether to get the register as a signed (or unsigned) integer.
395 * @param size Register size to use for conversion instructions.
396 * @returns GLSL string corresponding to the register as an integer.
397 */
398 std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true,
399 Register::Size size = Register::Size::Word) {
400 const std::string func{is_signed ? "floatBitsToInt" : "floatBitsToUint"};
401 const std::string value{func + '(' + GetRegister(reg, elem) + ')'};
402 return ConvertIntegerSize(value, size);
403 }
404
405 /**
406 * Writes code that does a register assignment to float value operation.
407 * @param reg The destination register to use.
408 * @param elem The element to use for the operation.
409 * @param value The code representing the value to assign.
410 * @param dest_num_components Number of components in the destination.
411 * @param value_num_components Number of components in the value.
412 * @param is_saturated Optional, when True, saturates the provided value.
413 * @param sets_cc Optional, when True, sets the corresponding values to the implemented
414 * condition flags.
415 * @param dest_elem Optional, the destination element to use for the operation.
416 */
417 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
418 u64 dest_num_components, u64 value_num_components,
419 bool is_saturated = false, bool sets_cc = false, u64 dest_elem = 0,
420 bool precise = false) {
421 const std::string clamped_value = is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value;
422 SetRegister(reg, elem, clamped_value, dest_num_components, value_num_components, dest_elem,
423 precise);
424 if (sets_cc) {
425 if (reg == Register::ZeroIndex) {
426 SetConditionalCodesFromExpression(clamped_value);
427 } else {
428 SetConditionalCodesFromRegister(reg, dest_elem);
429 }
430 }
431 }
432
433 /**
434 * Writes code that does a register assignment to integer value operation.
435 * @param reg The destination register to use.
436 * @param elem The element to use for the operation.
437 * @param value The code representing the value to assign.
438 * @param dest_num_components Number of components in the destination.
439 * @param value_num_components Number of components in the value.
440 * @param is_saturated Optional, when True, saturates the provided value.
441 * @param sets_cc Optional, when True, sets the corresponding values to the implemented
442 * condition flags.
443 * @param dest_elem Optional, the destination element to use for the operation.
444 * @param size Register size to use for conversion instructions.
445 */
446 void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
447 const std::string& value, u64 dest_num_components,
448 u64 value_num_components, bool is_saturated = false,
449 bool sets_cc = false, u64 dest_elem = 0,
450 Register::Size size = Register::Size::Word) {
451 UNIMPLEMENTED_IF(is_saturated);
452 const std::string final_value = ConvertIntegerSize(value, size);
453 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
454
455 SetRegister(reg, elem, func + '(' + final_value + ')', dest_num_components,
456 value_num_components, dest_elem, false);
457
458 if (sets_cc) {
459 if (reg == Register::ZeroIndex) {
460 SetConditionalCodesFromExpression(final_value);
461 } else {
462 SetConditionalCodesFromRegister(reg, dest_elem);
463 }
464 }
465 }
466
467 /**
468 * Writes code that does a register assignment to a half float value operation.
469 * @param reg The destination register to use.
470 * @param elem The element to use for the operation.
471 * @param value The code representing the value to assign. Type has to be half float.
472 * @param merge Half float kind of assignment.
473 * @param dest_num_components Number of components in the destination.
474 * @param value_num_components Number of components in the value.
475 * @param is_saturated Optional, when True, saturates the provided value.
476 * @param dest_elem Optional, the destination element to use for the operation.
477 */
478 void SetRegisterToHalfFloat(const Register& reg, u64 elem, const std::string& value,
479 Tegra::Shader::HalfMerge merge, u64 dest_num_components,
480 u64 value_num_components, bool is_saturated = false,
481 u64 dest_elem = 0) {
482 UNIMPLEMENTED_IF(is_saturated);
483
484 const std::string result = [&]() {
485 switch (merge) {
486 case Tegra::Shader::HalfMerge::H0_H1:
487 return "uintBitsToFloat(packHalf2x16(" + value + "))";
488 case Tegra::Shader::HalfMerge::F32:
489 // Half float instructions take the first component when doing a float cast.
490 return "float(" + value + ".x)";
491 case Tegra::Shader::HalfMerge::Mrg_H0:
492 // TODO(Rodrigo): I guess Mrg_H0 and Mrg_H1 take their respective component from the
493 // pack. I couldn't test this on hardware but it shouldn't really matter since most
494 // of the time when a Mrg_* flag is used both components will be mirrored. That
495 // being said, it deserves a test.
496 return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) +
497 " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))";
498 case Tegra::Shader::HalfMerge::Mrg_H1:
499 return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) +
500 " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))";
501 default:
502 UNREACHABLE();
503 return std::string("0");
504 }
505 }();
506
507 SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false);
508 }
509
510 /**
511 * Writes code that does a register assignment to input attribute operation. Input attributes
512 * are stored as floats, so this may require conversion.
513 * @param reg The destination register to use.
514 * @param elem The element to use for the operation.
515 * @param attribute The input attribute to use as the source value.
516 * @param input_mode The input mode.
517 * @param vertex The register that decides which vertex to read from (used in GS).
518 */
519 void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
520 const Tegra::Shader::IpaMode& input_mode,
521 std::optional<Register> vertex = {}) {
522 const std::string dest = GetRegisterAsFloat(reg);
523 const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem);
524 shader.AddLine(dest + " = " + src + ';');
525 }
526
527 std::string GetLocalMemoryAsFloat(const std::string& index) {
528 return "lmem[" + index + ']';
529 }
530
531 std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) {
532 const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"};
533 return func + "(lmem[" + index + "])";
534 }
535
536 void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) {
537 shader.AddLine("lmem[" + index + "] = " + value + ';');
538 }
539
540 void SetLocalMemoryAsInteger(const std::string& index, const std::string& value,
541 bool is_signed = false) {
542 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
543 shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");");
544 }
545
546 std::string GetConditionCode(const Tegra::Shader::ConditionCode cc) const {
547 switch (cc) {
548 case Tegra::Shader::ConditionCode::NEU:
549 return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')';
550 default:
551 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
552 return "false";
553 }
554 }
555
556 std::string GetInternalFlag(const InternalFlag flag) const {
557 const auto index = static_cast<u32>(flag);
558 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
559
560 return std::string(INTERNAL_FLAG_NAMES[index]) + '_' + suffix;
561 }
562
563 void SetInternalFlag(const InternalFlag flag, const std::string& value) const {
564 shader.AddLine(GetInternalFlag(flag) + " = " + value + ';');
565 }
566
567 /**
568 * Writes code that does a output attribute assignment to register operation. Output attributes
569 * are stored as floats, so this may require conversion.
570 * @param attribute The destination output attribute.
571 * @param elem The element to use for the operation.
572 * @param val_reg The register to use as the source value.
573 * @param buf_reg The register that tells which buffer to write to (used in geometry shaders).
574 */
575 void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& val_reg,
576 const Register& buf_reg) {
577 const std::string dest = GetOutputAttribute(attribute);
578 const std::string src = GetRegisterAsFloat(val_reg);
579 if (dest.empty())
580 return;
581
582 // Can happen with unknown/unimplemented output attributes, in which case we ignore the
583 // instruction for now.
584 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
585 // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
586 // shader. These instructions use a dirty register as buffer index, to avoid some
587 // drivers from complaining about out of boundary writes, guard them.
588 const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " +
589 std::to_string(MAX_GEOMETRY_BUFFERS) + ')'};
590 shader.AddLine("amem[" + buf_index + "][" +
591 std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) +
592 " = " + src + ';');
593 return;
594 }
595
596 switch (attribute) {
597 case Attribute::Index::ClipDistances0123:
598 case Attribute::Index::ClipDistances4567: {
599 const u64 index = (attribute == Attribute::Index::ClipDistances4567 ? 4 : 0) + elem;
600 UNIMPLEMENTED_IF_MSG(
601 ((header.vtg.clip_distances >> index) & 1) == 0,
602 "Shader is setting gl_ClipDistance{} without enabling it in the header", index);
603
604 clip_distances[index] = true;
605 fixed_pipeline_output_attributes_used.insert(attribute);
606 shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';');
607 break;
608 }
609 case Attribute::Index::PointSize:
610 fixed_pipeline_output_attributes_used.insert(attribute);
611 shader.AddLine(dest + " = " + src + ';');
612 break;
613 default:
614 shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
615 break;
616 }
617 }
618
619 /// Generates code representing a uniform (C buffer) register, interpreted as the input type.
620 std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type,
621 Register::Size size = Register::Size::Word) {
622 declr_const_buffers[index].MarkAsUsed(index, offset, stage);
623 std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" +
624 std::to_string(offset % 4) + ']';
625
626 if (type == GLSLRegister::Type::Float) {
627 // Do nothing, default
628 } else if (type == GLSLRegister::Type::Integer) {
629 value = "floatBitsToInt(" + value + ')';
630 } else if (type == GLSLRegister::Type::UnsignedInteger) {
631 value = "floatBitsToUint(" + value + ')';
632 } else {
633 UNREACHABLE();
634 }
635
636 return ConvertIntegerSize(value, size);
637 }
638
639 std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str,
640 GLSLRegister::Type type) {
641 declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage);
642
643 const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
644 const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
645 final_offset + " % 4]";
646
647 if (type == GLSLRegister::Type::Float) {
648 return value;
649 } else if (type == GLSLRegister::Type::Integer) {
650 return "floatBitsToInt(" + value + ')';
651 } else {
652 UNREACHABLE();
653 return value;
654 }
655 }
656
657 /// Add declarations.
658 void GenerateDeclarations(const std::string& suffix) {
659 GenerateVertex();
660 GenerateRegisters(suffix);
661 GenerateLocalMemory();
662 GenerateInternalFlags();
663 GenerateInputAttrs();
664 GenerateOutputAttrs();
665 GenerateConstBuffers();
666 GenerateSamplers();
667 GenerateGeometry();
668 }
669
670 /// Returns a list of constant buffer declarations.
671 std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const {
672 std::vector<ConstBufferEntry> result;
673 std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(),
674 std::back_inserter(result), [](const auto& entry) { return entry.IsUsed(); });
675 return result;
676 }
677
678 /// Returns a list of samplers used in the shader.
679 const std::vector<SamplerEntry>& GetSamplers() const {
680 return used_samplers;
681 }
682
683 /// Returns an array of the used clip distances.
684 const std::array<bool, Maxwell::NumClipDistances>& GetClipDistances() const {
685 return clip_distances;
686 }
687
688 /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
689 /// necessary.
690 std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
691 bool is_array, bool is_shadow) {
692 const auto offset = static_cast<std::size_t>(sampler.index.Value());
693
694 // If this sampler has already been used, return the existing mapping.
695 const auto itr =
696 std::find_if(used_samplers.begin(), used_samplers.end(),
697 [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
698
699 if (itr != used_samplers.end()) {
700 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
701 itr->IsShadow() == is_shadow);
702 return itr->GetName();
703 }
704
705 // Otherwise create a new mapping for this sampler
706 const std::size_t next_index = used_samplers.size();
707 const SamplerEntry entry{stage, offset, next_index, type, is_array, is_shadow};
708 used_samplers.emplace_back(entry);
709 return entry.GetName();
710 }
711
712 void SetLocalMemory(u64 lmem) {
713 local_memory_size = lmem;
714 }
715
716private:
717 /// Generates declarations for registers.
718 void GenerateRegisters(const std::string& suffix) {
719 for (const auto& reg : regs) {
720 declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() +
721 std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;");
722 }
723 declarations.AddNewLine();
724 }
725
726 /// Generates declarations for local memory.
727 void GenerateLocalMemory() {
728 if (local_memory_size > 0) {
729 declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) +
730 "];");
731 declarations.AddNewLine();
732 }
733 }
734
735 /// Generates declarations for internal flags.
736 void GenerateInternalFlags() {
737 for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
738 const InternalFlag code = static_cast<InternalFlag>(flag);
739 declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
740 }
741 declarations.AddNewLine();
742 }
743
744 /// Generates declarations for input attributes.
745 void GenerateInputAttrs() {
746 for (const auto element : declr_input_attribute) {
747 // TODO(bunnei): Use proper number of elements for these
748 u32 idx =
749 static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0);
750 if (stage != Maxwell3D::Regs::ShaderStage::Vertex) {
751 // If inputs are varyings, add an offset
752 idx += GENERIC_VARYING_START_LOCATION;
753 }
754
755 std::string attr{GetInputAttribute(element.first, element.second)};
756 if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
757 attr = "gs_" + attr + "[]";
758 }
759 declarations.AddLine("layout (location = " + std::to_string(idx) + ") " +
760 GetInputFlags(element.first) + "in vec4 " + attr + ';');
761 }
762
763 declarations.AddNewLine();
764 }
765
766 /// Generates declarations for output attributes.
767 void GenerateOutputAttrs() {
768 for (const auto& index : declr_output_attribute) {
769 // TODO(bunnei): Use proper number of elements for these
770 const u32 idx = static_cast<u32>(index) -
771 static_cast<u32>(Attribute::Index::Attribute_0) +
772 GENERIC_VARYING_START_LOCATION;
773 declarations.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " +
774 GetOutputAttribute(index) + ';');
775 }
776 declarations.AddNewLine();
777 }
778
779 /// Generates declarations for constant buffers.
780 void GenerateConstBuffers() {
781 for (const auto& entry : GetConstBuffersDeclarations()) {
782 declarations.AddLine("layout (std140) uniform " + entry.GetName());
783 declarations.AddLine('{');
784 declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) +
785 "[MAX_CONSTBUFFER_ELEMENTS];");
786 declarations.AddLine("};");
787 declarations.AddNewLine();
788 }
789 declarations.AddNewLine();
790 }
791
792 /// Generates declarations for samplers.
793 void GenerateSamplers() {
794 const auto& samplers = GetSamplers();
795 for (const auto& sampler : samplers) {
796 declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
797 ';');
798 }
799 declarations.AddNewLine();
800 }
801
802 /// Generates declarations used for geometry shaders.
803 void GenerateGeometry() {
804 if (stage != Maxwell3D::Regs::ShaderStage::Geometry)
805 return;
806
807 declarations.AddLine(
808 "layout (" + GetTopologyName(header.common3.output_topology) +
809 ", max_vertices = " + std::to_string(header.common4.max_output_vertices) + ") out;");
810 declarations.AddNewLine();
811
812 declarations.AddLine("vec4 amem[" + std::to_string(MAX_GEOMETRY_BUFFERS) + "][" +
813 std::to_string(MAX_ATTRIBUTES) + "];");
814 declarations.AddNewLine();
815
816 constexpr char buffer[] = "amem[output_buffer]";
817 declarations.AddLine("void emit_vertex(uint output_buffer) {");
818 ++declarations.scope;
819 for (const auto element : declr_output_attribute) {
820 declarations.AddLine(GetOutputAttribute(element) + " = " + buffer + '[' +
821 std::to_string(static_cast<u32>(element)) + "];");
822 }
823
824 declarations.AddLine("position = " + std::string(buffer) + '[' +
825 std::to_string(static_cast<u32>(Attribute::Index::Position)) + "];");
826
827 // If a geometry shader is attached, it will always flip (it's the last stage before
828 // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
829 declarations.AddLine("position.xy *= viewport_flip.xy;");
830 declarations.AddLine("gl_Position = position;");
831 declarations.AddLine("position.w = 1.0;");
832 declarations.AddLine("EmitVertex();");
833 --declarations.scope;
834 declarations.AddLine('}');
835 declarations.AddNewLine();
836 }
837
838 void GenerateVertex() {
839 if (stage != Maxwell3D::Regs::ShaderStage::Vertex)
840 return;
841 bool clip_distances_declared = false;
842
843 declarations.AddLine("out gl_PerVertex {");
844 ++declarations.scope;
845 declarations.AddLine("vec4 gl_Position;");
846 for (auto& o : fixed_pipeline_output_attributes_used) {
847 if (o == Attribute::Index::PointSize)
848 declarations.AddLine("float gl_PointSize;");
849 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
850 o == Attribute::Index::ClipDistances4567)) {
851 declarations.AddLine("float gl_ClipDistance[];");
852 clip_distances_declared = true;
853 }
854 }
855 --declarations.scope;
856 declarations.AddLine("};");
857 }
858
859 /// Generates code representing a temporary (GPR) register.
860 std::string GetRegister(const Register& reg, unsigned elem) {
861 if (reg == Register::ZeroIndex) {
862 return "0";
863 }
864
865 return regs[reg.GetSwizzledIndex(elem)].GetString();
866 }
867
868 /**
869 * Writes code that does a register assignment to value operation.
870 * @param reg The destination register to use.
871 * @param elem The element to use for the operation.
872 * @param value The code representing the value to assign.
873 * @param dest_num_components Number of components in the destination.
874 * @param value_num_components Number of components in the value.
875 * @param dest_elem Optional, the destination element to use for the operation.
876 */
877 void SetRegister(const Register& reg, u64 elem, const std::string& value,
878 u64 dest_num_components, u64 value_num_components, u64 dest_elem,
879 bool precise) {
880 if (reg == Register::ZeroIndex) {
881 // Setting RZ is a nop in hardware.
882 return;
883 }
884
885 std::string dest = GetRegister(reg, static_cast<u32>(dest_elem));
886 if (dest_num_components > 1) {
887 dest += GetSwizzle(elem);
888 }
889
890 std::string src = '(' + value + ')';
891 if (value_num_components > 1) {
892 src += GetSwizzle(elem);
893 }
894
895 if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
896 const auto scope = shader.Scope();
897
898 // This avoids optimizations of constant propagation and keeps the code as the original
899 // Sadly using the precise keyword causes "linking" errors on fragment shaders.
900 shader.AddLine("precise float tmp = " + src + ';');
901 shader.AddLine(dest + " = tmp;");
902 } else {
903 shader.AddLine(dest + " = " + src + ';');
904 }
905 }
906
907 /// Build the GLSL register list.
908 void BuildRegisterList() {
909 regs.reserve(Register::NumRegisters);
910
911 for (std::size_t index = 0; index < Register::NumRegisters; ++index) {
912 regs.emplace_back(index, suffix);
913 }
914 }
915
916 void BuildInputList() {
917 const u32 size = static_cast<u32>(Attribute::Index::Attribute_31) -
918 static_cast<u32>(Attribute::Index::Attribute_0) + 1;
919 declr_input_attribute.reserve(size);
920 }
921
922 /// Generates code representing an input attribute register.
923 std::string GetInputAttribute(Attribute::Index attribute,
924 const Tegra::Shader::IpaMode& input_mode,
925 std::optional<Register> vertex = {}) {
926 auto GeometryPass = [&](const std::string& name) {
927 if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) {
928 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games set
929 // an 0x80000000 index for those and the shader fails to build. Find out why this
930 // happens and what's its intent.
931 return "gs_" + name + '[' + GetRegisterAsInteger(*vertex, 0, false) +
932 " % MAX_VERTEX_INPUT]";
933 }
934 return name;
935 };
936
937 switch (attribute) {
938 case Attribute::Index::Position:
939 if (stage != Maxwell3D::Regs::ShaderStage::Fragment) {
940 return GeometryPass("position");
941 } else {
942 return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)";
943 }
944 case Attribute::Index::PointCoord:
945 return "vec4(gl_PointCoord.x, gl_PointCoord.y, 0, 0)";
946 case Attribute::Index::TessCoordInstanceIDVertexID:
947 // TODO(Subv): Find out what the values are for the first two elements when inside a
948 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
949 // shader.
950 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
951 // Config pack's first value is instance_id.
952 return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))";
953 case Attribute::Index::FrontFacing:
954 // TODO(Subv): Find out what the values are for the other elements.
955 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
956 return "vec4(0, 0, 0, intBitsToFloat(gl_FrontFacing ? -1 : 0))";
957 default:
958 const u32 index{static_cast<u32>(attribute) -
959 static_cast<u32>(Attribute::Index::Attribute_0)};
960 if (attribute >= Attribute::Index::Attribute_0 &&
961 attribute <= Attribute::Index::Attribute_31) {
962 if (declr_input_attribute.count(attribute) == 0) {
963 declr_input_attribute[attribute] = input_mode;
964 } else {
965 UNIMPLEMENTED_IF_MSG(declr_input_attribute[attribute] != input_mode,
966 "Multiple input modes for the same attribute");
967 }
968 return GeometryPass("input_attribute_" + std::to_string(index));
969 }
970
971 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
972 }
973
974 return "vec4(0, 0, 0, 0)";
975 }
976
977 std::string GetInputFlags(const Attribute::Index attribute) {
978 const Tegra::Shader::IpaSampleMode sample_mode =
979 declr_input_attribute[attribute].sampling_mode;
980 const Tegra::Shader::IpaInterpMode interp_mode =
981 declr_input_attribute[attribute].interpolation_mode;
982 std::string out;
983 switch (interp_mode) {
984 case Tegra::Shader::IpaInterpMode::Flat: {
985 out += "flat ";
986 break;
987 }
988 case Tegra::Shader::IpaInterpMode::Linear: {
989 out += "noperspective ";
990 break;
991 }
992 case Tegra::Shader::IpaInterpMode::Perspective: {
993 // Default, Smooth
994 break;
995 }
996 default: {
997 UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
998 }
999 }
1000 switch (sample_mode) {
1001 case Tegra::Shader::IpaSampleMode::Centroid:
1002 // It can be implemented with the "centroid " keyword in glsl
1003 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
1004 break;
1005 case Tegra::Shader::IpaSampleMode::Default:
1006 // Default, n/a
1007 break;
1008 default: {
1009 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
1010 break;
1011 }
1012 }
1013 return out;
1014 }
1015
1016 /// Generates code representing the declaration name of an output attribute register.
1017 std::string GetOutputAttribute(Attribute::Index attribute) {
1018 switch (attribute) {
1019 case Attribute::Index::PointSize:
1020 return "gl_PointSize";
1021 case Attribute::Index::Position:
1022 return "position";
1023 case Attribute::Index::ClipDistances0123:
1024 case Attribute::Index::ClipDistances4567: {
1025 return "gl_ClipDistance";
1026 }
1027 default:
1028 const u32 index{static_cast<u32>(attribute) -
1029 static_cast<u32>(Attribute::Index::Attribute_0)};
1030 if (attribute >= Attribute::Index::Attribute_0) {
1031 declr_output_attribute.insert(attribute);
1032 return "output_attribute_" + std::to_string(index);
1033 }
1034
1035 UNIMPLEMENTED_MSG("Unhandled output attribute={}", index);
1036 return {};
1037 }
1038 }
1039
1040 ShaderWriter& shader;
1041 ShaderWriter& declarations;
1042 std::vector<GLSLRegister> regs;
1043 std::unordered_map<Attribute::Index, Tegra::Shader::IpaMode> declr_input_attribute;
1044 std::set<Attribute::Index> declr_output_attribute;
1045 std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
1046 std::vector<SamplerEntry> used_samplers;
1047 const Maxwell3D::Regs::ShaderStage& stage;
1048 const std::string& suffix;
1049 const Tegra::Shader::Header& header;
1050 std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
1051 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
1052 u64 local_memory_size;
1053};
1054
1055class GLSLGenerator {
1056public:
1057 GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
1058 u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix,
1059 std::size_t shader_length)
1060 : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
1061 stage(stage), suffix(suffix), shader_length(shader_length) {
1062 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
1063 local_memory_size = header.GetLocalMemorySize();
1064 regs.SetLocalMemory(local_memory_size);
1065 Generate(suffix);
1066 }
1067
1068 std::string GetShaderCode() {
1069 return declarations.GetResult() + shader.GetResult();
1070 }
1071
1072 /// Returns entries in the shader that are useful for external functions
1073 ShaderEntries GetEntries() const {
1074 return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), regs.GetClipDistances(),
1075 shader_length};
1076 }
1077
1078private:
1079 /// Gets the Subroutine object corresponding to the specified address.
1080 const Subroutine& GetSubroutine(u32 begin, u32 end) const {
1081 const auto iter = subroutines.find(Subroutine{begin, end, suffix});
1082 ASSERT(iter != subroutines.end());
1083 return *iter;
1084 }
1085
1086 /// Generates code representing a 19-bit immediate value
1087 static std::string GetImmediate19(const Instruction& instr) {
1088 return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19());
1089 }
1090
1091 /// Generates code representing a 32-bit immediate value
1092 static std::string GetImmediate32(const Instruction& instr) {
1093 return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32());
1094 }
1095
1096 /// Generates code representing a vec2 pair unpacked from a half float immediate
1097 static std::string UnpackHalfImmediate(const Instruction& instr, bool negate) {
1098 const std::string immediate = GetHalfFloat(std::to_string(instr.half_imm.PackImmediates()));
1099 if (!negate) {
1100 return immediate;
1101 }
1102 const std::string negate_first = instr.half_imm.first_negate != 0 ? "-" : "";
1103 const std::string negate_second = instr.half_imm.second_negate != 0 ? "-" : "";
1104 const std::string negate_vec = "vec2(" + negate_first + "1, " + negate_second + "1)";
1105
1106 return '(' + immediate + " * " + negate_vec + ')';
1107 }
1108
1109 /// Generates code representing a texture sampler.
1110 std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array,
1111 bool is_shadow) {
1112 return regs.AccessSampler(sampler, type, is_array, is_shadow);
1113 }
1114
1115 /**
1116 * Adds code that calls a subroutine.
1117 * @param subroutine the subroutine to call.
1118 */
1119 void CallSubroutine(const Subroutine& subroutine) {
1120 if (subroutine.exit_method == ExitMethod::AlwaysEnd) {
1121 shader.AddLine(subroutine.GetName() + "();");
1122 shader.AddLine("return true;");
1123 } else if (subroutine.exit_method == ExitMethod::Conditional) {
1124 shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }");
1125 } else {
1126 shader.AddLine(subroutine.GetName() + "();");
1127 }
1128 }
1129
1130 /*
1131 * Writes code that assigns a predicate boolean variable.
1132 * @param pred The id of the predicate to write to.
1133 * @param value The expression value to assign to the predicate.
1134 */
1135 void SetPredicate(u64 pred, const std::string& value) {
1136 using Tegra::Shader::Pred;
1137 // Can't assign to the constant predicate.
1138 ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
1139
1140 std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
1141 shader.AddLine(variable + " = " + value + ';');
1142 declr_predicates.insert(std::move(variable));
1143 }
1144
1145 /*
1146 * Returns the condition to use in the 'if' for a predicated instruction.
1147 * @param instr Instruction to generate the if condition for.
1148 * @returns string containing the predicate condition.
1149 */
1150 std::string GetPredicateCondition(u64 index, bool negate) {
1151 using Tegra::Shader::Pred;
1152 std::string variable;
1153
1154 // Index 7 is used as an 'Always True' condition.
1155 if (index == static_cast<u64>(Pred::UnusedIndex)) {
1156 variable = "true";
1157 } else {
1158 variable = 'p' + std::to_string(index) + '_' + suffix;
1159 declr_predicates.insert(variable);
1160 }
1161 if (negate) {
1162 return "!(" + variable + ')';
1163 }
1164
1165 return variable;
1166 }
1167
1168 /**
1169 * Returns the comparison string to use to compare two values in the 'set' family of
1170 * instructions.
1171 * @param condition The condition used in the 'set'-family instruction.
1172 * @param op_a First operand to use for the comparison.
1173 * @param op_b Second operand to use for the comparison.
1174 * @returns String corresponding to the GLSL operator that matches the desired comparison.
1175 */
1176 std::string GetPredicateComparison(Tegra::Shader::PredCondition condition,
1177 const std::string& op_a, const std::string& op_b) const {
1178 using Tegra::Shader::PredCondition;
1179 static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
1180 {PredCondition::LessThan, "<"},
1181 {PredCondition::Equal, "=="},
1182 {PredCondition::LessEqual, "<="},
1183 {PredCondition::GreaterThan, ">"},
1184 {PredCondition::NotEqual, "!="},
1185 {PredCondition::GreaterEqual, ">="},
1186 {PredCondition::LessThanWithNan, "<"},
1187 {PredCondition::NotEqualWithNan, "!="},
1188 {PredCondition::LessEqualWithNan, "<="},
1189 {PredCondition::GreaterThanWithNan, ">"},
1190 {PredCondition::GreaterEqualWithNan, ">="}};
1191
1192 const auto& comparison{PredicateComparisonStrings.find(condition)};
1193 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonStrings.end(),
1194 "Unknown predicate comparison operation");
1195
1196 std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
1197 if (condition == PredCondition::LessThanWithNan ||
1198 condition == PredCondition::NotEqualWithNan ||
1199 condition == PredCondition::LessEqualWithNan ||
1200 condition == PredCondition::GreaterThanWithNan ||
1201 condition == PredCondition::GreaterEqualWithNan) {
1202 predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
1203 }
1204
1205 return predicate;
1206 }
1207
1208 /**
1209 * Returns the operator string to use to combine two predicates in the 'setp' family of
1210 * instructions.
1211 * @params operation The operator used in the 'setp'-family instruction.
1212 * @returns String corresponding to the GLSL operator that matches the desired operator.
1213 */
1214 std::string GetPredicateCombiner(Tegra::Shader::PredOperation operation) const {
1215 using Tegra::Shader::PredOperation;
1216 static const std::unordered_map<PredOperation, const char*> PredicateOperationStrings = {
1217 {PredOperation::And, "&&"},
1218 {PredOperation::Or, "||"},
1219 {PredOperation::Xor, "^^"},
1220 };
1221
1222 auto op = PredicateOperationStrings.find(operation);
1223 UNIMPLEMENTED_IF_MSG(op == PredicateOperationStrings.end(), "Unknown predicate operation");
1224 return op->second;
1225 }
1226
1227 /**
1228 * Transforms the input string GLSL operand into one that applies the abs() function and negates
1229 * the output if necessary. When both abs and neg are true, the negation will be applied after
1230 * taking the absolute value.
1231 * @param operand The input operand to take the abs() of, negate, or both.
1232 * @param abs Whether to apply the abs() function to the input operand.
1233 * @param neg Whether to negate the input operand.
1234 * @returns String corresponding to the operand after being transformed by the abs() and
1235 * negation operations.
1236 */
1237 static std::string GetOperandAbsNeg(const std::string& operand, bool abs, bool neg) {
1238 std::string result = operand;
1239
1240 if (abs) {
1241 result = "abs(" + result + ')';
1242 }
1243
1244 if (neg) {
1245 result = "-(" + result + ')';
1246 }
1247
1248 return result;
1249 }
1250
1251 /*
1252 * Transforms the input string GLSL operand into an unpacked half float pair.
1253 * @note This function returns a float type pair instead of a half float pair. This is because
1254 * real half floats are not standardized in GLSL but unpackHalf2x16 (which returns a vec2) is.
1255 * @param operand Input operand. It has to be an unsigned integer.
1256 * @param type How to unpack the unsigned integer to a half float pair.
1257 * @param abs Get the absolute value of unpacked half floats.
1258 * @param neg Get the negative value of unpacked half floats.
1259 * @returns String corresponding to a half float pair.
1260 */
1261 static std::string GetHalfFloat(const std::string& operand,
1262 Tegra::Shader::HalfType type = Tegra::Shader::HalfType::H0_H1,
1263 bool abs = false, bool neg = false) {
1264 // "vec2" calls emitted in this function are intended to alias components.
1265 const std::string value = [&]() {
1266 switch (type) {
1267 case Tegra::Shader::HalfType::H0_H1:
1268 return "unpackHalf2x16(" + operand + ')';
1269 case Tegra::Shader::HalfType::F32:
1270 return "vec2(uintBitsToFloat(" + operand + "))";
1271 case Tegra::Shader::HalfType::H0_H0:
1272 case Tegra::Shader::HalfType::H1_H1: {
1273 const bool high = type == Tegra::Shader::HalfType::H1_H1;
1274 const char unpack_index = "xy"[high ? 1 : 0];
1275 return "vec2(unpackHalf2x16(" + operand + ")." + unpack_index + ')';
1276 }
1277 default:
1278 UNREACHABLE();
1279 return std::string("vec2(0)");
1280 }
1281 }();
1282
1283 return GetOperandAbsNeg(value, abs, neg);
1284 }
1285
1286 /*
1287 * Returns whether the instruction at the specified offset is a 'sched' instruction.
1288 * Sched instructions always appear before a sequence of 3 instructions.
1289 */
1290 bool IsSchedInstruction(u32 offset) const {
1291 // sched instructions appear once every 4 instructions.
1292 static constexpr std::size_t SchedPeriod = 4;
1293 u32 absolute_offset = offset - main_offset;
1294
1295 return (absolute_offset % SchedPeriod) == 0;
1296 }
1297
1298 void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
1299 const std::string& op_b,
1300 Tegra::Shader::PredicateResultMode predicate_mode,
1301 Tegra::Shader::Pred predicate, const bool set_cc) {
1302 std::string result{};
1303 switch (logic_op) {
1304 case LogicOperation::And: {
1305 result = '(' + op_a + " & " + op_b + ')';
1306 break;
1307 }
1308 case LogicOperation::Or: {
1309 result = '(' + op_a + " | " + op_b + ')';
1310 break;
1311 }
1312 case LogicOperation::Xor: {
1313 result = '(' + op_a + " ^ " + op_b + ')';
1314 break;
1315 }
1316 case LogicOperation::PassB: {
1317 result = op_b;
1318 break;
1319 }
1320 default:
1321 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
1322 }
1323
1324 if (dest != Tegra::Shader::Register::ZeroIndex) {
1325 regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc);
1326 }
1327
1328 using Tegra::Shader::PredicateResultMode;
1329 // Write the predicate value depending on the predicate mode.
1330 switch (predicate_mode) {
1331 case PredicateResultMode::None:
1332 // Do nothing.
1333 return;
1334 case PredicateResultMode::NotZero:
1335 // Set the predicate to true if the result is not zero.
1336 SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0");
1337 break;
1338 default:
1339 UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}",
1340 static_cast<u32>(predicate_mode));
1341 }
1342 }
1343
1344 void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b,
1345 const std::string& op_c, const std::string& imm_lut,
1346 const bool set_cc) {
1347 if (dest == Tegra::Shader::Register::ZeroIndex) {
1348 return;
1349 }
1350
1351 static constexpr std::array<const char*, 32> shift_amounts = {
1352 "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
1353 "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
1354 "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"};
1355
1356 std::string result;
1357 result += '(';
1358
1359 for (std::size_t i = 0; i < shift_amounts.size(); ++i) {
1360 if (i)
1361 result += '|';
1362 result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] +
1363 ") & 1) | ((" + op_b + " >> " + shift_amounts[i] + ") & 1) << 1 | ((" + op_a +
1364 " >> " + shift_amounts[i] + ") & 1) << 2)) & 1) << " + shift_amounts[i] + ")";
1365 }
1366
1367 result += ')';
1368
1369 regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc);
1370 }
1371
1372 void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) {
1373 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
1374 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
1375
1376 std::size_t written_components = 0;
1377 for (u32 component = 0; component < 4; ++component) {
1378 if (!instr.texs.IsComponentEnabled(component)) {
1379 continue;
1380 }
1381
1382 if (written_components < 2) {
1383 // Write the first two swizzle components to gpr0 and gpr0+1
1384 regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, false,
1385 written_components % 2);
1386 } else {
1387 ASSERT(instr.texs.HasTwoDestinations());
1388 // Write the rest of the swizzle components to gpr28 and gpr28+1
1389 regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, false,
1390 written_components % 2);
1391 }
1392
1393 ++written_components;
1394 }
1395 }
1396
1397 void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) {
1398 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
1399 // float instruction).
1400
1401 std::array<std::string, 4> components;
1402 u32 written_components = 0;
1403
1404 for (u32 component = 0; component < 4; ++component) {
1405 if (!instr.texs.IsComponentEnabled(component))
1406 continue;
1407 components[written_components++] = texture + GetSwizzle(component);
1408 }
1409 if (written_components == 0)
1410 return;
1411
1412 const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) {
1413 return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')';
1414 };
1415
1416 regs.SetRegisterToHalfFloat(
1417 instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1),
1418 Tegra::Shader::HalfMerge::H0_H1, 1, 1);
1419
1420 if (written_components > 2) {
1421 ASSERT(instr.texs.HasTwoDestinations());
1422 regs.SetRegisterToHalfFloat(
1423 instr.gpr28, 0,
1424 BuildComponent(components[2], components[3], written_components > 3),
1425 Tegra::Shader::HalfMerge::H0_H1, 1, 1);
1426 }
1427 }
1428
1429 static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
1430 switch (texture_type) {
1431 case Tegra::Shader::TextureType::Texture1D:
1432 return 1;
1433 case Tegra::Shader::TextureType::Texture2D:
1434 return 2;
1435 case Tegra::Shader::TextureType::Texture3D:
1436 case Tegra::Shader::TextureType::TextureCube:
1437 return 3;
1438 default:
1439 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
1440 return 0;
1441 }
1442 }
1443
1444 /*
1445 * Emits code to push the input target address to the flow address stack, incrementing the stack
1446 * top.
1447 */
1448 void EmitPushToFlowStack(u32 target) {
1449 const auto scope = shader.Scope();
1450
1451 shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;");
1452 shader.AddLine("flow_stack_top++;");
1453 }
1454
1455 /*
1456 * Emits code to pop an address from the flow address stack, setting the jump address to the
1457 * popped address and decrementing the stack top.
1458 */
1459 void EmitPopFromFlowStack() {
1460 const auto scope = shader.Scope();
1461
1462 shader.AddLine("flow_stack_top--;");
1463 shader.AddLine("jmp_to = flow_stack[flow_stack_top];");
1464 shader.AddLine("break;");
1465 }
1466
1467 /// Writes the output values from a fragment shader to the corresponding GLSL output variables.
1468 void EmitFragmentOutputsWrite() {
1469 ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
1470
1471 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Samplemask write is unimplemented");
1472
1473 shader.AddLine("if (alpha_test[0] != 0) {");
1474 ++shader.scope;
1475 // We start on the register containing the alpha value in the first RT.
1476 u32 current_reg = 3;
1477 for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets;
1478 ++render_target) {
1479 // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when
1480 // multiple render targets are used.
1481 if (header.ps.IsColorComponentOutputEnabled(render_target, 0) ||
1482 header.ps.IsColorComponentOutputEnabled(render_target, 1) ||
1483 header.ps.IsColorComponentOutputEnabled(render_target, 2) ||
1484 header.ps.IsColorComponentOutputEnabled(render_target, 3)) {
1485 shader.AddLine(fmt::format("if (!AlphaFunc({})) discard;",
1486 regs.GetRegisterAsFloat(current_reg)));
1487 current_reg += 4;
1488 }
1489 }
1490 --shader.scope;
1491 shader.AddLine('}');
1492
1493 // Write the color outputs using the data in the shader registers, disabled
1494 // rendertargets/components are skipped in the register assignment.
1495 current_reg = 0;
1496 for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets;
1497 ++render_target) {
1498 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
1499 for (u32 component = 0; component < 4; ++component) {
1500 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
1501 shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
1502 regs.GetRegisterAsFloat(current_reg)));
1503 ++current_reg;
1504 }
1505 }
1506 }
1507
1508 if (header.ps.omap.depth) {
1509 // The depth output is always 2 registers after the last color output, and current_reg
1510 // already contains one past the last color register.
1511
1512 shader.AddLine(
1513 "gl_FragDepth = " +
1514 regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) +
1515 ';');
1516 }
1517 }
1518
1519 /// Unpacks a video instruction operand (e.g. VMAD).
1520 std::string GetVideoOperand(const std::string& op, bool is_chunk, bool is_signed,
1521 Tegra::Shader::VideoType type, u64 byte_height) {
1522 const std::string value = [&]() {
1523 if (!is_chunk) {
1524 const auto offset = static_cast<u32>(byte_height * 8);
1525 return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
1526 }
1527 const std::string zero = "0";
1528
1529 switch (type) {
1530 case Tegra::Shader::VideoType::Size16_Low:
1531 return '(' + op + " & 0xffff)";
1532 case Tegra::Shader::VideoType::Size16_High:
1533 return '(' + op + " >> 16)";
1534 case Tegra::Shader::VideoType::Size32:
1535 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
1536 // this type is used (1 * 1 + 0 == 0x5b800000). Until a better
1537 // explanation is found: abort.
1538 UNIMPLEMENTED();
1539 return zero;
1540 case Tegra::Shader::VideoType::Invalid:
1541 UNREACHABLE_MSG("Invalid instruction encoding");
1542 return zero;
1543 default:
1544 UNREACHABLE();
1545 return zero;
1546 }
1547 }();
1548
1549 if (is_signed) {
1550 return "int(" + value + ')';
1551 }
1552 return value;
1553 };
1554
1555 /// Gets the A operand for a video instruction.
1556 std::string GetVideoOperandA(Instruction instr) {
1557 return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
1558 instr.video.is_byte_chunk_a != 0, instr.video.signed_a,
1559 instr.video.type_a, instr.video.byte_height_a);
1560 }
1561
1562 /// Gets the B operand for a video instruction.
1563 std::string GetVideoOperandB(Instruction instr) {
1564 if (instr.video.use_register_b) {
1565 return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
1566 instr.video.is_byte_chunk_b != 0, instr.video.signed_b,
1567 instr.video.type_b, instr.video.byte_height_b);
1568 } else {
1569 return '(' +
1570 std::to_string(instr.video.signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
1571 : instr.alu.GetImm20_16()) +
1572 ')';
1573 }
1574 }
1575
1576 std::pair<size_t, std::string> ValidateAndGetCoordinateElement(
1577 const Tegra::Shader::TextureType texture_type, const bool depth_compare,
1578 const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) {
1579 const size_t coord_count = TextureCoordinates(texture_type);
1580
1581 size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
1582 const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
1583 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
1584 UNIMPLEMENTED_MSG("Unsupported Texture operation");
1585 total_coord_count = std::min(total_coord_count, max_coords);
1586 }
1587 // 1D.DC opengl is using a vec3 but 2nd component is ignored later.
1588 total_coord_count +=
1589 (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D)
1590 ? 1
1591 : 0;
1592
1593 constexpr std::array<const char*, 5> coord_container{
1594 {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(",
1595 "vec4 coord = vec4("}};
1596
1597 return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]);
1598 }
1599
1600 std::string GetTextureCode(const Tegra::Shader::Instruction& instr,
1601 const Tegra::Shader::TextureType texture_type,
1602 const Tegra::Shader::TextureProcessMode process_mode,
1603 const bool depth_compare, const bool is_array,
1604 const size_t bias_offset) {
1605
1606 if ((texture_type == Tegra::Shader::TextureType::Texture3D &&
1607 (is_array || depth_compare)) ||
1608 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array &&
1609 depth_compare)) {
1610 UNIMPLEMENTED_MSG("This method is not supported.");
1611 }
1612
1613 const std::string sampler =
1614 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
1615
1616 const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ ||
1617 process_mode == Tegra::Shader::TextureProcessMode::LL ||
1618 process_mode == Tegra::Shader::TextureProcessMode::LLA;
1619
1620 // LOD selection (either via bias or explicit textureLod) not supported in GL for
1621 // sampler2DArrayShadow and samplerCubeArrayShadow.
1622 const bool gl_lod_supported = !(
1623 (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
1624 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
1625
1626 const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture(";
1627 std::string texture = read_method + sampler + ", coord";
1628
1629 UNIMPLEMENTED_IF(process_mode != Tegra::Shader::TextureProcessMode::None &&
1630 !gl_lod_supported);
1631
1632 if (process_mode != Tegra::Shader::TextureProcessMode::None && gl_lod_supported) {
1633 if (process_mode == Tegra::Shader::TextureProcessMode::LZ) {
1634 texture += ", 0.0";
1635 } else {
1636 // If present, lod or bias are always stored in the register indexed by the
1637 // gpr20
1638 // field with an offset depending on the usage of the other registers
1639 texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset);
1640 }
1641 }
1642 texture += ")";
1643 return texture;
1644 }
1645
1646 std::pair<std::string, std::string> GetTEXCode(
1647 const Instruction& instr, const Tegra::Shader::TextureType texture_type,
1648 const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
1649 const bool is_array) {
1650 const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
1651 process_mode != Tegra::Shader::TextureProcessMode::LZ);
1652
1653 const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
1654 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
1655 // If enabled arrays index is always stored in the gpr8 field
1656 const u64 array_register = instr.gpr8.Value();
1657 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
1658 const u64 coord_register = array_register + (is_array ? 1 : 0);
1659
1660 std::string coord = coord_dcl;
1661 for (size_t i = 0; i < coord_count;) {
1662 coord += regs.GetRegisterAsFloat(coord_register + i);
1663 ++i;
1664 if (i != coord_count) {
1665 coord += ',';
1666 }
1667 }
1668 // 1D.DC in opengl the 2nd component is ignored.
1669 if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) {
1670 coord += ",0.0";
1671 }
1672 if (is_array) {
1673 coord += ',' + regs.GetRegisterAsInteger(array_register);
1674 }
1675 if (depth_compare) {
1676 // Depth is always stored in the register signaled by gpr20
1677 // or in the next register if lod or bias are used
1678 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
1679 coord += ',' + regs.GetRegisterAsFloat(depth_register);
1680 }
1681 coord += ");";
1682 return std::make_pair(
1683 coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0));
1684 }
1685
1686 std::pair<std::string, std::string> GetTEXSCode(
1687 const Instruction& instr, const Tegra::Shader::TextureType texture_type,
1688 const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
1689 const bool is_array) {
1690 const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
1691 process_mode != Tegra::Shader::TextureProcessMode::LZ);
1692
1693 const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
1694 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
1695 // If enabled arrays index is always stored in the gpr8 field
1696 const u64 array_register = instr.gpr8.Value();
1697 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
1698 const u64 coord_register = array_register + (is_array ? 1 : 0);
1699 const u64 last_coord_register =
1700 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
1701 ? static_cast<u64>(instr.gpr20.Value())
1702 : coord_register + 1;
1703
1704 std::string coord = coord_dcl;
1705 for (size_t i = 0; i < coord_count; ++i) {
1706 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
1707 coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i);
1708 if (i < coord_count - 1) {
1709 coord += ',';
1710 }
1711 }
1712
1713 if (is_array) {
1714 coord += ',' + regs.GetRegisterAsInteger(array_register);
1715 }
1716 if (depth_compare) {
1717 // Depth is always stored in the register signaled by gpr20
1718 // or in the next register if lod or bias are used
1719 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
1720 coord += ',' + regs.GetRegisterAsFloat(depth_register);
1721 }
1722 coord += ");";
1723
1724 return std::make_pair(coord,
1725 GetTextureCode(instr, texture_type, process_mode, depth_compare,
1726 is_array, (coord_count > 2 ? 1 : 0)));
1727 }
1728
1729 std::pair<std::string, std::string> GetTLD4Code(const Instruction& instr,
1730 const Tegra::Shader::TextureType texture_type,
1731 const bool depth_compare, const bool is_array) {
1732
1733 const size_t coord_count = TextureCoordinates(texture_type);
1734 const size_t total_coord_count = coord_count + (is_array ? 1 : 0);
1735 const size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
1736
1737 constexpr std::array<const char*, 5> coord_container{
1738 {"", "", "vec2 coord = vec2(", "vec3 coord = vec3(", "vec4 coord = vec4("}};
1739
1740 // If enabled arrays index is always stored in the gpr8 field
1741 const u64 array_register = instr.gpr8.Value();
1742 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
1743 const u64 coord_register = array_register + (is_array ? 1 : 0);
1744
1745 std::string coord = coord_container[total_coord_count];
1746 for (size_t i = 0; i < coord_count;) {
1747 coord += regs.GetRegisterAsFloat(coord_register + i);
1748 ++i;
1749 if (i != coord_count) {
1750 coord += ',';
1751 }
1752 }
1753
1754 if (is_array) {
1755 coord += ',' + regs.GetRegisterAsInteger(array_register);
1756 }
1757 coord += ");";
1758
1759 const std::string sampler =
1760 GetSampler(instr.sampler, texture_type, is_array, depth_compare);
1761
1762 std::string texture = "textureGather(" + sampler + ", coord, ";
1763 if (depth_compare) {
1764 // Depth is always stored in the register signaled by gpr20
1765 texture += regs.GetRegisterAsFloat(instr.gpr20.Value()) + ')';
1766 } else {
1767 texture += std::to_string(instr.tld4.component) + ')';
1768 }
1769 return std::make_pair(coord, texture);
1770 }
1771
1772 std::pair<std::string, std::string> GetTLDSCode(const Instruction& instr,
1773 const Tegra::Shader::TextureType texture_type,
1774 const bool is_array) {
1775
1776 const size_t coord_count = TextureCoordinates(texture_type);
1777 const size_t total_coord_count = coord_count + (is_array ? 1 : 0);
1778 const bool lod_enabled =
1779 instr.tlds.GetTextureProcessMode() == Tegra::Shader::TextureProcessMode::LL;
1780
1781 constexpr std::array<const char*, 4> coord_container{
1782 {"", "int coords = (", "ivec2 coords = ivec2(", "ivec3 coords = ivec3("}};
1783
1784 std::string coord = coord_container[total_coord_count];
1785
1786 // If enabled arrays index is always stored in the gpr8 field
1787 const u64 array_register = instr.gpr8.Value();
1788
1789 // if is array gpr20 is used
1790 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
1791
1792 const u64 last_coord_register =
1793 ((coord_count > 2) || (coord_count == 2 && !lod_enabled)) && !is_array
1794 ? static_cast<u64>(instr.gpr20.Value())
1795 : coord_register + 1;
1796
1797 for (size_t i = 0; i < coord_count; ++i) {
1798 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
1799 coord += regs.GetRegisterAsInteger(last ? last_coord_register : coord_register + i);
1800 if (i < coord_count - 1) {
1801 coord += ',';
1802 }
1803 }
1804 if (is_array) {
1805 coord += ',' + regs.GetRegisterAsInteger(array_register);
1806 }
1807 coord += ");";
1808
1809 const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false);
1810
1811 std::string texture = "texelFetch(" + sampler + ", coords";
1812
1813 if (lod_enabled) {
1814 // When lod is used always is in grp20
1815 texture += ", " + regs.GetRegisterAsInteger(instr.gpr20) + ')';
1816 } else {
1817 texture += ", 0)";
1818 }
1819 return std::make_pair(coord, texture);
1820 }
1821
1822 /**
1823 * Compiles a single instruction from Tegra to GLSL.
1824 * @param offset the offset of the Tegra shader instruction.
1825 * @return the offset of the next instruction to execute. Usually it is the current offset
1826 * + 1. If the current instruction always terminates the program, returns PROGRAM_END.
1827 */
1828 u32 CompileInstr(u32 offset) {
1829 // Ignore sched instructions when generating code.
1830 if (IsSchedInstruction(offset)) {
1831 return offset + 1;
1832 }
1833
1834 const Instruction instr = {program_code[offset]};
1835 const auto opcode = OpCode::Decode(instr);
1836
1837 // Decoding failure
1838 if (!opcode) {
1839 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
1840 return offset + 1;
1841 }
1842
1843 shader.AddLine(
1844 fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value));
1845
1846 using Tegra::Shader::Pred;
1847 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
1848 "NeverExecute predicate not implemented");
1849
1850 // Some instructions (like SSY) don't have a predicate field, they are always
1851 // unconditionally executed.
1852 bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
1853
1854 if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
1855 shader.AddLine("if (" +
1856 GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) +
1857 ')');
1858 shader.AddLine('{');
1859 ++shader.scope;
1860 }
1861
1862 switch (opcode->get().GetType()) {
1863 case OpCode::Type::Arithmetic: {
1864 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
1865
1866 std::string op_b;
1867
1868 if (instr.is_b_imm) {
1869 op_b = GetImmediate19(instr);
1870 } else {
1871 if (instr.is_b_gpr) {
1872 op_b = regs.GetRegisterAsFloat(instr.gpr20);
1873 } else {
1874 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1875 GLSLRegister::Type::Float);
1876 }
1877 }
1878
1879 switch (opcode->get().GetId()) {
1880 case OpCode::Id::MOV_C:
1881 case OpCode::Id::MOV_R: {
1882 // MOV does not have neither 'abs' nor 'neg' bits.
1883 regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
1884 break;
1885 }
1886
1887 case OpCode::Id::FMUL_C:
1888 case OpCode::Id::FMUL_R:
1889 case OpCode::Id::FMUL_IMM: {
1890 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
1891 UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0,
1892 "FMUL tab5cb8_2({}) is not implemented",
1893 instr.fmul.tab5cb8_2.Value());
1894 UNIMPLEMENTED_IF_MSG(
1895 instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
1896 instr.fmul.tab5c68_0
1897 .Value()); // SMO typical sends 1 here which seems to be the default
1898
1899 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
1900
1901 std::string postfactor_op;
1902 if (instr.fmul.postfactor != 0) {
1903 s8 postfactor = static_cast<s8>(instr.fmul.postfactor);
1904
1905 // postfactor encoded as 3-bit 1's complement in instruction,
1906 // interpreted with below logic.
1907 if (postfactor >= 4) {
1908 postfactor = 7 - postfactor;
1909 } else {
1910 postfactor = 0 - postfactor;
1911 }
1912
1913 if (postfactor > 0) {
1914 postfactor_op = " * " + std::to_string(1 << postfactor);
1915 } else {
1916 postfactor_op = " / " + std::to_string(1 << -postfactor);
1917 }
1918 }
1919
1920 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + postfactor_op, 1, 1,
1921 instr.alu.saturate_d, instr.generates_cc, 0, true);
1922 break;
1923 }
1924 case OpCode::Id::FADD_C:
1925 case OpCode::Id::FADD_R:
1926 case OpCode::Id::FADD_IMM: {
1927 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
1928 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
1929
1930 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
1931 instr.alu.saturate_d, instr.generates_cc, 0, true);
1932 break;
1933 }
1934 case OpCode::Id::MUFU: {
1935 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
1936 switch (instr.sub_op) {
1937 case SubOp::Cos:
1938 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
1939 instr.alu.saturate_d, false, 0, true);
1940 break;
1941 case SubOp::Sin:
1942 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
1943 instr.alu.saturate_d, false, 0, true);
1944 break;
1945 case SubOp::Ex2:
1946 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
1947 instr.alu.saturate_d, false, 0, true);
1948 break;
1949 case SubOp::Lg2:
1950 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
1951 instr.alu.saturate_d, false, 0, true);
1952 break;
1953 case SubOp::Rcp:
1954 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
1955 instr.alu.saturate_d, false, 0, true);
1956 break;
1957 case SubOp::Rsq:
1958 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
1959 instr.alu.saturate_d, false, 0, true);
1960 break;
1961 case SubOp::Sqrt:
1962 regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
1963 instr.alu.saturate_d, false, 0, true);
1964 break;
1965 default:
1966 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
1967 static_cast<unsigned>(instr.sub_op.Value()));
1968 }
1969 break;
1970 }
1971 case OpCode::Id::FMNMX_C:
1972 case OpCode::Id::FMNMX_R:
1973 case OpCode::Id::FMNMX_IMM: {
1974 UNIMPLEMENTED_IF_MSG(
1975 instr.generates_cc,
1976 "Condition codes generation in FMNMX is partially implemented");
1977
1978 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
1979 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
1980
1981 std::string condition =
1982 GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
1983 std::string parameters = op_a + ',' + op_b;
1984 regs.SetRegisterToFloat(instr.gpr0, 0,
1985 '(' + condition + ") ? min(" + parameters + ") : max(" +
1986 parameters + ')',
1987 1, 1, false, instr.generates_cc, 0, true);
1988 break;
1989 }
1990 case OpCode::Id::RRO_C:
1991 case OpCode::Id::RRO_R:
1992 case OpCode::Id::RRO_IMM: {
1993 // Currently RRO is only implemented as a register move.
1994 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
1995 regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
1996 LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
1997 break;
1998 }
1999 default: {
2000 UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
2001 }
2002 }
2003 break;
2004 }
2005 case OpCode::Type::ArithmeticImmediate: {
2006 switch (opcode->get().GetId()) {
2007 case OpCode::Id::MOV32_IMM: {
2008 regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
2009 break;
2010 }
2011 case OpCode::Id::FMUL32_IMM: {
2012 regs.SetRegisterToFloat(
2013 instr.gpr0, 0,
2014 regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1,
2015 instr.fmul32.saturate, instr.op_32.generates_cc, 0, true);
2016 break;
2017 }
2018 case OpCode::Id::FADD32I: {
2019 UNIMPLEMENTED_IF_MSG(
2020 instr.op_32.generates_cc,
2021 "Condition codes generation in FADD32I is partially implemented");
2022
2023 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
2024 std::string op_b = GetImmediate32(instr);
2025
2026 if (instr.fadd32i.abs_a) {
2027 op_a = "abs(" + op_a + ')';
2028 }
2029
2030 if (instr.fadd32i.negate_a) {
2031 op_a = "-(" + op_a + ')';
2032 }
2033
2034 if (instr.fadd32i.abs_b) {
2035 op_b = "abs(" + op_b + ')';
2036 }
2037
2038 if (instr.fadd32i.negate_b) {
2039 op_b = "-(" + op_b + ')';
2040 }
2041
2042 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false,
2043 instr.op_32.generates_cc, 0, true);
2044 break;
2045 }
2046 }
2047 break;
2048 }
2049 case OpCode::Type::Bfe: {
2050 UNIMPLEMENTED_IF(instr.bfe.negate_b);
2051
2052 std::string op_a = instr.bfe.negate_a ? "-" : "";
2053 op_a += regs.GetRegisterAsInteger(instr.gpr8);
2054
2055 switch (opcode->get().GetId()) {
2056 case OpCode::Id::BFE_IMM: {
2057 std::string inner_shift =
2058 '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
2059 std::string outer_shift =
2060 '(' + inner_shift + " >> " +
2061 std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';
2062
2063 regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1, false,
2064 instr.generates_cc);
2065 break;
2066 }
2067 default: {
2068 UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
2069 }
2070 }
2071
2072 break;
2073 }
2074 case OpCode::Type::Bfi: {
2075 const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> {
2076 switch (opcode->get().GetId()) {
2077 case OpCode::Id::BFI_IMM_R:
2078 return {regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2079 std::to_string(instr.alu.GetSignedImm20_20())};
2080 default:
2081 UNREACHABLE();
2082 return {regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2083 std::to_string(instr.alu.GetSignedImm20_20())};
2084 }
2085 }();
2086 const std::string offset = '(' + packed_shift + " & 0xff)";
2087 const std::string bits = "((" + packed_shift + " >> 8) & 0xff)";
2088 const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
2089 regs.SetRegisterToInteger(instr.gpr0, false, 0,
2090 "bitfieldInsert(" + base + ", " + insert + ", " + offset +
2091 ", " + bits + ')',
2092 1, 1, false, instr.generates_cc);
2093 break;
2094 }
2095 case OpCode::Type::Shift: {
2096 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
2097 std::string op_b;
2098
2099 if (instr.is_b_imm) {
2100 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
2101 } else {
2102 if (instr.is_b_gpr) {
2103 op_b += regs.GetRegisterAsInteger(instr.gpr20);
2104 } else {
2105 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2106 GLSLRegister::Type::Integer);
2107 }
2108 }
2109
2110 switch (opcode->get().GetId()) {
2111 case OpCode::Id::SHR_C:
2112 case OpCode::Id::SHR_R:
2113 case OpCode::Id::SHR_IMM: {
2114 if (!instr.shift.is_signed) {
2115 // Logical shift right
2116 op_a = "uint(" + op_a + ')';
2117 }
2118
2119 // Cast to int is superfluous for arithmetic shift, it's only for a logical shift
2120 regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')',
2121 1, 1, false, instr.generates_cc);
2122 break;
2123 }
2124 case OpCode::Id::SHL_C:
2125 case OpCode::Id::SHL_R:
2126 case OpCode::Id::SHL_IMM:
2127 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
2128 "Condition codes generation in SHL is not implemented");
2129 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1, false,
2130 instr.generates_cc);
2131 break;
2132 default: {
2133 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
2134 }
2135 }
2136 break;
2137 }
2138 case OpCode::Type::ArithmeticIntegerImmediate: {
2139 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
2140 std::string op_b = std::to_string(instr.alu.imm20_32.Value());
2141
2142 switch (opcode->get().GetId()) {
2143 case OpCode::Id::IADD32I:
2144 UNIMPLEMENTED_IF_MSG(
2145 instr.op_32.generates_cc,
2146 "Condition codes generation in IADD32I is partially implemented");
2147
2148 if (instr.iadd32i.negate_a)
2149 op_a = "-(" + op_a + ')';
2150
2151 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
2152 instr.iadd32i.saturate, instr.op_32.generates_cc);
2153 break;
2154 case OpCode::Id::LOP32I: {
2155
2156 if (instr.alu.lop32i.invert_a)
2157 op_a = "~(" + op_a + ')';
2158
2159 if (instr.alu.lop32i.invert_b)
2160 op_b = "~(" + op_b + ')';
2161
2162 WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
2163 Tegra::Shader::PredicateResultMode::None,
2164 Tegra::Shader::Pred::UnusedIndex, instr.op_32.generates_cc);
2165 break;
2166 }
2167 default: {
2168 UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
2169 opcode->get().GetName());
2170 }
2171 }
2172 break;
2173 }
2174 case OpCode::Type::ArithmeticInteger: {
2175 std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
2176 std::string op_b;
2177 if (instr.is_b_imm) {
2178 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
2179 } else {
2180 if (instr.is_b_gpr) {
2181 op_b += regs.GetRegisterAsInteger(instr.gpr20);
2182 } else {
2183 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2184 GLSLRegister::Type::Integer);
2185 }
2186 }
2187
2188 switch (opcode->get().GetId()) {
2189 case OpCode::Id::IADD_C:
2190 case OpCode::Id::IADD_R:
2191 case OpCode::Id::IADD_IMM: {
2192 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
2193 "Condition codes generation in IADD is partially implemented");
2194
2195 if (instr.alu_integer.negate_a)
2196 op_a = "-(" + op_a + ')';
2197
2198 if (instr.alu_integer.negate_b)
2199 op_b = "-(" + op_b + ')';
2200
2201 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
2202 instr.alu.saturate_d, instr.generates_cc);
2203 break;
2204 }
2205 case OpCode::Id::IADD3_C:
2206 case OpCode::Id::IADD3_R:
2207 case OpCode::Id::IADD3_IMM: {
2208 UNIMPLEMENTED_IF_MSG(
2209 instr.generates_cc,
2210 "Condition codes generation in IADD3 is partially implemented");
2211
2212 std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
2213
2214 auto apply_height = [](auto height, auto& oprand) {
2215 switch (height) {
2216 case Tegra::Shader::IAdd3Height::None:
2217 break;
2218 case Tegra::Shader::IAdd3Height::LowerHalfWord:
2219 oprand = "((" + oprand + ") & 0xFFFF)";
2220 break;
2221 case Tegra::Shader::IAdd3Height::UpperHalfWord:
2222 oprand = "((" + oprand + ") >> 16)";
2223 break;
2224 default:
2225 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}",
2226 static_cast<u32>(height.Value()));
2227 }
2228 };
2229
2230 if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
2231 apply_height(instr.iadd3.height_a, op_a);
2232 apply_height(instr.iadd3.height_b, op_b);
2233 apply_height(instr.iadd3.height_c, op_c);
2234 }
2235
2236 if (instr.iadd3.neg_a)
2237 op_a = "-(" + op_a + ')';
2238
2239 if (instr.iadd3.neg_b)
2240 op_b = "-(" + op_b + ')';
2241
2242 if (instr.iadd3.neg_c)
2243 op_c = "-(" + op_c + ')';
2244
2245 std::string result;
2246 if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
2247 switch (instr.iadd3.mode) {
2248 case Tegra::Shader::IAdd3Mode::RightShift:
2249 // TODO(tech4me): According to
2250 // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
2251 // The addition between op_a and op_b should be done in uint33, more
2252 // investigation required
2253 result = "(((" + op_a + " + " + op_b + ") >> 16) + " + op_c + ')';
2254 break;
2255 case Tegra::Shader::IAdd3Mode::LeftShift:
2256 result = "(((" + op_a + " + " + op_b + ") << 16) + " + op_c + ')';
2257 break;
2258 default:
2259 result = '(' + op_a + " + " + op_b + " + " + op_c + ')';
2260 break;
2261 }
2262 } else {
2263 result = '(' + op_a + " + " + op_b + " + " + op_c + ')';
2264 }
2265
2266 regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1, false,
2267 instr.generates_cc);
2268 break;
2269 }
2270 case OpCode::Id::ISCADD_C:
2271 case OpCode::Id::ISCADD_R:
2272 case OpCode::Id::ISCADD_IMM: {
2273 UNIMPLEMENTED_IF_MSG(
2274 instr.generates_cc,
2275 "Condition codes generation in ISCADD is partially implemented");
2276
2277 if (instr.alu_integer.negate_a)
2278 op_a = "-(" + op_a + ')';
2279
2280 if (instr.alu_integer.negate_b)
2281 op_b = "-(" + op_b + ')';
2282
2283 const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
2284
2285 regs.SetRegisterToInteger(instr.gpr0, true, 0,
2286 "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1,
2287 false, instr.generates_cc);
2288 break;
2289 }
2290 case OpCode::Id::POPC_C:
2291 case OpCode::Id::POPC_R:
2292 case OpCode::Id::POPC_IMM: {
2293 if (instr.popc.invert) {
2294 op_b = "~(" + op_b + ')';
2295 }
2296 regs.SetRegisterToInteger(instr.gpr0, true, 0, "bitCount(" + op_b + ')', 1, 1);
2297 break;
2298 }
2299 case OpCode::Id::SEL_C:
2300 case OpCode::Id::SEL_R:
2301 case OpCode::Id::SEL_IMM: {
2302 const std::string condition =
2303 GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0);
2304 regs.SetRegisterToInteger(instr.gpr0, true, 0,
2305 '(' + condition + ") ? " + op_a + " : " + op_b, 1, 1);
2306 break;
2307 }
2308 case OpCode::Id::LOP_C:
2309 case OpCode::Id::LOP_R:
2310 case OpCode::Id::LOP_IMM: {
2311
2312 if (instr.alu.lop.invert_a)
2313 op_a = "~(" + op_a + ')';
2314
2315 if (instr.alu.lop.invert_b)
2316 op_b = "~(" + op_b + ')';
2317
2318 WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
2319 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
2320 instr.generates_cc);
2321 break;
2322 }
2323 case OpCode::Id::LOP3_C:
2324 case OpCode::Id::LOP3_R:
2325 case OpCode::Id::LOP3_IMM: {
2326 const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
2327 std::string lut;
2328
2329 if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
2330 lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
2331 } else {
2332 lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')';
2333 }
2334
2335 WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
2336 break;
2337 }
2338 case OpCode::Id::IMNMX_C:
2339 case OpCode::Id::IMNMX_R:
2340 case OpCode::Id::IMNMX_IMM: {
2341 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
2342 UNIMPLEMENTED_IF_MSG(
2343 instr.generates_cc,
2344 "Condition codes generation in IMNMX is partially implemented");
2345
2346 const std::string condition =
2347 GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
2348 const std::string parameters = op_a + ',' + op_b;
2349 regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0,
2350 '(' + condition + ") ? min(" + parameters + ") : max(" +
2351 parameters + ')',
2352 1, 1, false, instr.generates_cc);
2353 break;
2354 }
2355 case OpCode::Id::LEA_R2:
2356 case OpCode::Id::LEA_R1:
2357 case OpCode::Id::LEA_IMM:
2358 case OpCode::Id::LEA_RZ:
2359 case OpCode::Id::LEA_HI: {
2360 std::string op_c;
2361
2362 switch (opcode->get().GetId()) {
2363 case OpCode::Id::LEA_R2: {
2364 op_a = regs.GetRegisterAsInteger(instr.gpr20);
2365 op_b = regs.GetRegisterAsInteger(instr.gpr39);
2366 op_c = std::to_string(instr.lea.r2.entry_a);
2367 break;
2368 }
2369
2370 case OpCode::Id::LEA_R1: {
2371 const bool neg = instr.lea.r1.neg != 0;
2372 op_a = regs.GetRegisterAsInteger(instr.gpr8);
2373 if (neg)
2374 op_a = "-(" + op_a + ')';
2375 op_b = regs.GetRegisterAsInteger(instr.gpr20);
2376 op_c = std::to_string(instr.lea.r1.entry_a);
2377 break;
2378 }
2379
2380 case OpCode::Id::LEA_IMM: {
2381 const bool neg = instr.lea.imm.neg != 0;
2382 op_b = regs.GetRegisterAsInteger(instr.gpr8);
2383 if (neg)
2384 op_b = "-(" + op_b + ')';
2385 op_a = std::to_string(instr.lea.imm.entry_a);
2386 op_c = std::to_string(instr.lea.imm.entry_b);
2387 break;
2388 }
2389
2390 case OpCode::Id::LEA_RZ: {
2391 const bool neg = instr.lea.rz.neg != 0;
2392 op_b = regs.GetRegisterAsInteger(instr.gpr8);
2393 if (neg)
2394 op_b = "-(" + op_b + ')';
2395 op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset,
2396 GLSLRegister::Type::Integer);
2397 op_c = std::to_string(instr.lea.rz.entry_a);
2398
2399 break;
2400 }
2401
2402 case OpCode::Id::LEA_HI:
2403 default: {
2404 op_b = regs.GetRegisterAsInteger(instr.gpr8);
2405 op_a = std::to_string(instr.lea.imm.entry_a);
2406 op_c = std::to_string(instr.lea.imm.entry_b);
2407 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
2408 }
2409 }
2410 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
2411 "Unhandled LEA Predicate");
2412 const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
2413 regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1, false,
2414 instr.generates_cc);
2415
2416 break;
2417 }
2418 default: {
2419 UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}",
2420 opcode->get().GetName());
2421 }
2422 }
2423
2424 break;
2425 }
2426 case OpCode::Type::ArithmeticHalf: {
2427 if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
2428 opcode->get().GetId() == OpCode::Id::HADD2_R) {
2429 UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
2430 }
2431 const bool negate_a =
2432 opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
2433 const bool negate_b =
2434 opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
2435
2436 const std::string op_a =
2437 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a,
2438 instr.alu_half.abs_a != 0, negate_a);
2439
2440 std::string op_b;
2441 switch (opcode->get().GetId()) {
2442 case OpCode::Id::HADD2_C:
2443 case OpCode::Id::HMUL2_C:
2444 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2445 GLSLRegister::Type::UnsignedInteger);
2446 break;
2447 case OpCode::Id::HADD2_R:
2448 case OpCode::Id::HMUL2_R:
2449 op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, false);
2450 break;
2451 default:
2452 UNREACHABLE();
2453 op_b = "0";
2454 break;
2455 }
2456 op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b);
2457
2458 const std::string result = [&]() {
2459 switch (opcode->get().GetId()) {
2460 case OpCode::Id::HADD2_C:
2461 case OpCode::Id::HADD2_R:
2462 return '(' + op_a + " + " + op_b + ')';
2463 case OpCode::Id::HMUL2_C:
2464 case OpCode::Id::HMUL2_R:
2465 return '(' + op_a + " * " + op_b + ')';
2466 default:
2467 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}",
2468 opcode->get().GetName());
2469 return std::string("0");
2470 }
2471 }();
2472
2473 regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half.merge, 1, 1,
2474 instr.alu_half.saturate != 0);
2475 break;
2476 }
2477 case OpCode::Type::ArithmeticHalfImmediate: {
2478 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
2479 UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
2480 } else {
2481 UNIMPLEMENTED_IF(instr.alu_half_imm.precision !=
2482 Tegra::Shader::HalfPrecision::None);
2483 }
2484
2485 const std::string op_a = GetHalfFloat(
2486 regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half_imm.type_a,
2487 instr.alu_half_imm.abs_a != 0, instr.alu_half_imm.negate_a != 0);
2488
2489 const std::string op_b = UnpackHalfImmediate(instr, true);
2490
2491 const std::string result = [&]() {
2492 switch (opcode->get().GetId()) {
2493 case OpCode::Id::HADD2_IMM:
2494 return op_a + " + " + op_b;
2495 case OpCode::Id::HMUL2_IMM:
2496 return op_a + " * " + op_b;
2497 default:
2498 UNREACHABLE();
2499 return std::string("0");
2500 }
2501 }();
2502
2503 regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half_imm.merge, 1, 1,
2504 instr.alu_half_imm.saturate != 0);
2505 break;
2506 }
2507 case OpCode::Type::Ffma: {
2508 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
2509 std::string op_b = instr.ffma.negate_b ? "-" : "";
2510 std::string op_c = instr.ffma.negate_c ? "-" : "";
2511
2512 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
2513 UNIMPLEMENTED_IF_MSG(
2514 instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
2515 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
2516 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
2517 instr.ffma.tab5980_1.Value());
2518 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
2519 "Condition codes generation in FFMA is partially implemented");
2520
2521 switch (opcode->get().GetId()) {
2522 case OpCode::Id::FFMA_CR: {
2523 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2524 GLSLRegister::Type::Float);
2525 op_c += regs.GetRegisterAsFloat(instr.gpr39);
2526 break;
2527 }
2528 case OpCode::Id::FFMA_RR: {
2529 op_b += regs.GetRegisterAsFloat(instr.gpr20);
2530 op_c += regs.GetRegisterAsFloat(instr.gpr39);
2531 break;
2532 }
2533 case OpCode::Id::FFMA_RC: {
2534 op_b += regs.GetRegisterAsFloat(instr.gpr39);
2535 op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2536 GLSLRegister::Type::Float);
2537 break;
2538 }
2539 case OpCode::Id::FFMA_IMM: {
2540 op_b += GetImmediate19(instr);
2541 op_c += regs.GetRegisterAsFloat(instr.gpr39);
2542 break;
2543 }
2544 default: {
2545 UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
2546 }
2547 }
2548
2549 regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')',
2550 1, 1, instr.alu.saturate_d, instr.generates_cc, 0, true);
2551 break;
2552 }
2553 case OpCode::Type::Hfma2: {
2554 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
2555 UNIMPLEMENTED_IF(instr.hfma2.rr.precision != Tegra::Shader::HalfPrecision::None);
2556 } else {
2557 UNIMPLEMENTED_IF(instr.hfma2.precision != Tegra::Shader::HalfPrecision::None);
2558 }
2559 const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR
2560 ? instr.hfma2.rr.saturate != 0
2561 : instr.hfma2.saturate != 0;
2562
2563 const std::string op_a =
2564 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a);
2565 std::string op_b, op_c;
2566
2567 switch (opcode->get().GetId()) {
2568 case OpCode::Id::HFMA2_CR:
2569 op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2570 GLSLRegister::Type::UnsignedInteger),
2571 instr.hfma2.type_b, false, instr.hfma2.negate_b);
2572 op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2573 instr.hfma2.type_reg39, false, instr.hfma2.negate_c);
2574 break;
2575 case OpCode::Id::HFMA2_RC:
2576 op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2577 instr.hfma2.type_reg39, false, instr.hfma2.negate_b);
2578 op_c = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2579 GLSLRegister::Type::UnsignedInteger),
2580 instr.hfma2.type_b, false, instr.hfma2.negate_c);
2581 break;
2582 case OpCode::Id::HFMA2_RR:
2583 op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
2584 instr.hfma2.type_b, false, instr.hfma2.negate_b);
2585 op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2586 instr.hfma2.rr.type_c, false, instr.hfma2.rr.negate_c);
2587 break;
2588 case OpCode::Id::HFMA2_IMM_R:
2589 op_b = UnpackHalfImmediate(instr, true);
2590 op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
2591 instr.hfma2.type_reg39, false, instr.hfma2.negate_c);
2592 break;
2593 default:
2594 UNREACHABLE();
2595 op_c = op_b = "vec2(0)";
2596 break;
2597 }
2598
2599 const std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
2600
2601 regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.hfma2.merge, 1, 1, saturate);
2602 break;
2603 }
2604 case OpCode::Type::Conversion: {
2605 switch (opcode->get().GetId()) {
2606 case OpCode::Id::I2I_R: {
2607 UNIMPLEMENTED_IF(instr.conversion.selector);
2608
2609 std::string op_a = regs.GetRegisterAsInteger(
2610 instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
2611
2612 if (instr.conversion.abs_a) {
2613 op_a = "abs(" + op_a + ')';
2614 }
2615
2616 if (instr.conversion.negate_a) {
2617 op_a = "-(" + op_a + ')';
2618 }
2619
2620 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
2621 1, instr.alu.saturate_d, instr.generates_cc, 0,
2622 instr.conversion.dest_size);
2623 break;
2624 }
2625 case OpCode::Id::I2F_R:
2626 case OpCode::Id::I2F_C: {
2627 UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
2628 UNIMPLEMENTED_IF(instr.conversion.selector);
2629 std::string op_a;
2630
2631 if (instr.is_b_gpr) {
2632 op_a =
2633 regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed,
2634 instr.conversion.src_size);
2635 } else {
2636 op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2637 instr.conversion.is_input_signed
2638 ? GLSLRegister::Type::Integer
2639 : GLSLRegister::Type::UnsignedInteger,
2640 instr.conversion.src_size);
2641 }
2642
2643 if (instr.conversion.abs_a) {
2644 op_a = "abs(" + op_a + ')';
2645 }
2646
2647 if (instr.conversion.negate_a) {
2648 op_a = "-(" + op_a + ')';
2649 }
2650
2651 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, false, instr.generates_cc);
2652 break;
2653 }
2654 case OpCode::Id::F2F_R: {
2655 UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
2656 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
2657 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
2658
2659 if (instr.conversion.abs_a) {
2660 op_a = "abs(" + op_a + ')';
2661 }
2662
2663 if (instr.conversion.negate_a) {
2664 op_a = "-(" + op_a + ')';
2665 }
2666
2667 switch (instr.conversion.f2f.rounding) {
2668 case Tegra::Shader::F2fRoundingOp::None:
2669 break;
2670 case Tegra::Shader::F2fRoundingOp::Round:
2671 op_a = "roundEven(" + op_a + ')';
2672 break;
2673 case Tegra::Shader::F2fRoundingOp::Floor:
2674 op_a = "floor(" + op_a + ')';
2675 break;
2676 case Tegra::Shader::F2fRoundingOp::Ceil:
2677 op_a = "ceil(" + op_a + ')';
2678 break;
2679 case Tegra::Shader::F2fRoundingOp::Trunc:
2680 op_a = "trunc(" + op_a + ')';
2681 break;
2682 default:
2683 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
2684 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
2685 break;
2686 }
2687
2688 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d,
2689 instr.generates_cc);
2690 break;
2691 }
2692 case OpCode::Id::F2I_R:
2693 case OpCode::Id::F2I_C: {
2694 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
2695 std::string op_a{};
2696
2697 if (instr.is_b_gpr) {
2698 op_a = regs.GetRegisterAsFloat(instr.gpr20);
2699 } else {
2700 op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
2701 GLSLRegister::Type::Float);
2702 }
2703
2704 if (instr.conversion.abs_a) {
2705 op_a = "abs(" + op_a + ')';
2706 }
2707
2708 if (instr.conversion.negate_a) {
2709 op_a = "-(" + op_a + ')';
2710 }
2711
2712 switch (instr.conversion.f2i.rounding) {
2713 case Tegra::Shader::F2iRoundingOp::None:
2714 break;
2715 case Tegra::Shader::F2iRoundingOp::Floor:
2716 op_a = "floor(" + op_a + ')';
2717 break;
2718 case Tegra::Shader::F2iRoundingOp::Ceil:
2719 op_a = "ceil(" + op_a + ')';
2720 break;
2721 case Tegra::Shader::F2iRoundingOp::Trunc:
2722 op_a = "trunc(" + op_a + ')';
2723 break;
2724 default:
2725 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
2726 static_cast<u32>(instr.conversion.f2i.rounding.Value()));
2727 break;
2728 }
2729
2730 if (instr.conversion.is_output_signed) {
2731 op_a = "int(" + op_a + ')';
2732 } else {
2733 op_a = "uint(" + op_a + ')';
2734 }
2735
2736 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
2737 1, false, instr.generates_cc, 0,
2738 instr.conversion.dest_size);
2739 break;
2740 }
2741 default: {
2742 UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
2743 }
2744 }
2745 break;
2746 }
2747 case OpCode::Type::Memory: {
2748 switch (opcode->get().GetId()) {
2749 case OpCode::Id::LD_A: {
2750 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
2751 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
2752 "Indirect attribute loads are not supported");
2753 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
2754 "Unaligned attribute loads are not supported");
2755
2756 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
2757 Tegra::Shader::IpaSampleMode::Default};
2758
2759 u64 next_element = instr.attribute.fmt20.element;
2760 u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
2761
2762 const auto LoadNextElement = [&](u32 reg_offset) {
2763 regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
2764 static_cast<Attribute::Index>(next_index),
2765 input_mode, instr.gpr39.Value());
2766
2767 // Load the next attribute element into the following register. If the element
2768 // to load goes beyond the vec4 size, load the first element of the next
2769 // attribute.
2770 next_element = (next_element + 1) % 4;
2771 next_index = next_index + (next_element == 0 ? 1 : 0);
2772 };
2773
2774 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
2775 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
2776 LoadNextElement(reg_offset);
2777 }
2778 break;
2779 }
2780 case OpCode::Id::LD_C: {
2781 UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
2782
2783 const auto scope = shader.Scope();
2784
2785 shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
2786 " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
2787
2788 const std::string op_a =
2789 regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index",
2790 GLSLRegister::Type::Float);
2791
2792 switch (instr.ld_c.type.Value()) {
2793 case Tegra::Shader::UniformType::Single:
2794 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
2795 break;
2796
2797 case Tegra::Shader::UniformType::Double: {
2798 const std::string op_b =
2799 regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4,
2800 "index", GLSLRegister::Type::Float);
2801 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
2802 regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1);
2803 break;
2804 }
2805 default:
2806 UNIMPLEMENTED_MSG("Unhandled type: {}",
2807 static_cast<unsigned>(instr.ld_c.type.Value()));
2808 }
2809 break;
2810 }
2811 case OpCode::Id::LD_L: {
2812 UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
2813 static_cast<unsigned>(instr.ld_l.unknown.Value()));
2814
2815 const auto scope = shader.Scope();
2816
2817 std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
2818 std::to_string(instr.smem_imm.Value()) + ')';
2819
2820 shader.AddLine("uint index = (" + op + " / 4);");
2821
2822 const std::string op_a = regs.GetLocalMemoryAsFloat("index");
2823
2824 switch (instr.ldst_sl.type.Value()) {
2825 case Tegra::Shader::StoreType::Bytes32:
2826 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
2827 break;
2828 default:
2829 UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
2830 static_cast<unsigned>(instr.ldst_sl.type.Value()));
2831 }
2832 break;
2833 }
2834 case OpCode::Id::ST_A: {
2835 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
2836 "Indirect attribute loads are not supported");
2837 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
2838 "Unaligned attribute loads are not supported");
2839
2840 u64 next_element = instr.attribute.fmt20.element;
2841 u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
2842
2843 const auto StoreNextElement = [&](u32 reg_offset) {
2844 regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
2845 next_element, instr.gpr0.Value() + reg_offset,
2846 instr.gpr39.Value());
2847
2848 // Load the next attribute element into the following register. If the element
2849 // to load goes beyond the vec4 size, load the first element of the next
2850 // attribute.
2851 next_element = (next_element + 1) % 4;
2852 next_index = next_index + (next_element == 0 ? 1 : 0);
2853 };
2854
2855 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
2856 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
2857 StoreNextElement(reg_offset);
2858 }
2859
2860 break;
2861 }
2862 case OpCode::Id::ST_L: {
2863 UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
2864 static_cast<unsigned>(instr.st_l.unknown.Value()));
2865
2866 const auto scope = shader.Scope();
2867
2868 std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
2869 std::to_string(instr.smem_imm.Value()) + ')';
2870
2871 shader.AddLine("uint index = (" + op + " / 4);");
2872
2873 switch (instr.ldst_sl.type.Value()) {
2874 case Tegra::Shader::StoreType::Bytes32:
2875 regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0));
2876 break;
2877 default:
2878 UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
2879 static_cast<unsigned>(instr.ldst_sl.type.Value()));
2880 }
2881 break;
2882 }
2883 case OpCode::Id::TEX: {
2884 Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
2885 const bool is_array = instr.tex.array != 0;
2886 const bool depth_compare =
2887 instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2888 const auto process_mode = instr.tex.GetTextureProcessMode();
2889 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2890 "NODEP is not implemented");
2891 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2892 "AOFFI is not implemented");
2893
2894 const auto [coord, texture] =
2895 GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array);
2896
2897 const auto scope = shader.Scope();
2898 shader.AddLine(coord);
2899
2900 if (depth_compare) {
2901 regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1);
2902 } else {
2903 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2904 std::size_t dest_elem{};
2905 for (std::size_t elem = 0; elem < 4; ++elem) {
2906 if (!instr.tex.IsComponentEnabled(elem)) {
2907 // Skip disabled components
2908 continue;
2909 }
2910 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false,
2911 dest_elem);
2912 ++dest_elem;
2913 }
2914 }
2915 break;
2916 }
2917 case OpCode::Id::TEXS: {
2918 Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
2919 const bool is_array{instr.texs.IsArrayTexture()};
2920 const bool depth_compare =
2921 instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2922 const auto process_mode = instr.texs.GetTextureProcessMode();
2923
2924 UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2925 "NODEP is not implemented");
2926
2927 const auto scope = shader.Scope();
2928
2929 auto [coord, texture] =
2930 GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array);
2931
2932 shader.AddLine(coord);
2933
2934 if (depth_compare) {
2935 texture = "vec4(" + texture + ')';
2936 }
2937 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2938
2939 if (instr.texs.fp32_flag) {
2940 WriteTexsInstructionFloat(instr, "texture_tmp");
2941 } else {
2942 WriteTexsInstructionHalfFloat(instr, "texture_tmp");
2943 }
2944 break;
2945 }
2946 case OpCode::Id::TLDS: {
2947 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
2948 const bool is_array{instr.tlds.IsArrayTexture()};
2949
2950 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2951 "NODEP is not implemented");
2952 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2953 "AOFFI is not implemented");
2954 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
2955 "MZ is not implemented");
2956
2957 const auto [coord, texture] = GetTLDSCode(instr, texture_type, is_array);
2958
2959 const auto scope = shader.Scope();
2960
2961 shader.AddLine(coord);
2962 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2963 WriteTexsInstructionFloat(instr, "texture_tmp");
2964 break;
2965 }
2966 case OpCode::Id::TLD4: {
2967
2968 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
2969 "NODEP is not implemented");
2970 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
2971 "AOFFI is not implemented");
2972 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
2973 "NDV is not implemented");
2974 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
2975 "PTP is not implemented");
2976
2977 auto texture_type = instr.tld4.texture_type.Value();
2978 const bool depth_compare =
2979 instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
2980 const bool is_array = instr.tld4.array != 0;
2981
2982 const auto [coord, texture] =
2983 GetTLD4Code(instr, texture_type, depth_compare, is_array);
2984
2985 const auto scope = shader.Scope();
2986
2987 shader.AddLine(coord);
2988 std::size_t dest_elem{};
2989
2990 shader.AddLine("vec4 texture_tmp = " + texture + ';');
2991 for (std::size_t elem = 0; elem < 4; ++elem) {
2992 if (!instr.tex.IsComponentEnabled(elem)) {
2993 // Skip disabled components
2994 continue;
2995 }
2996 regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false,
2997 dest_elem);
2998 ++dest_elem;
2999 }
3000 break;
3001 }
3002 case OpCode::Id::TLD4S: {
3003 UNIMPLEMENTED_IF_MSG(
3004 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
3005 "NODEP is not implemented");
3006 UNIMPLEMENTED_IF_MSG(
3007 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
3008 "AOFFI is not implemented");
3009
3010 const auto scope = shader.Scope();
3011
3012 std::string coords;
3013
3014 const bool depth_compare =
3015 instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
3016
3017 const std::string sampler = GetSampler(
3018 instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare);
3019
3020 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
3021 coords = "vec2 coords = vec2(" + op_a + ", ";
3022 std::string texture = "textureGather(" + sampler + ", coords, ";
3023
3024 if (!depth_compare) {
3025 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
3026 coords += op_b + ");";
3027 texture += std::to_string(instr.tld4s.component) + ')';
3028 } else {
3029 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
3030 const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
3031 coords += op_b + ");";
3032 texture += op_c + ')';
3033 }
3034 shader.AddLine(coords);
3035 shader.AddLine("vec4 texture_tmp = " + texture + ';');
3036 WriteTexsInstructionFloat(instr, "texture_tmp");
3037 break;
3038 }
3039 case OpCode::Id::TXQ: {
3040 UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
3041 "NODEP is not implemented");
3042
3043 const auto scope = shader.Scope();
3044
3045 // TODO: The new commits on the texture refactor, change the way samplers work.
3046 // Sadly, not all texture instructions specify the type of texture their sampler
3047 // uses. This must be fixed at a later instance.
3048 const std::string sampler =
3049 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
3050 switch (instr.txq.query_type) {
3051 case Tegra::Shader::TextureQueryType::Dimension: {
3052 const std::string texture = "textureSize(" + sampler + ", " +
3053 regs.GetRegisterAsInteger(instr.gpr8) + ')';
3054 const std::string mip_level = "textureQueryLevels(" + sampler + ')';
3055 shader.AddLine("ivec2 sizes = " + texture + ';');
3056
3057 regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1);
3058 regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1);
3059 regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1);
3060 regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1);
3061 break;
3062 }
3063 default: {
3064 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
3065 static_cast<u32>(instr.txq.query_type.Value()));
3066 }
3067 }
3068 break;
3069 }
3070 case OpCode::Id::TMML: {
3071 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
3072 "NODEP is not implemented");
3073 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
3074 "NDV is not implemented");
3075
3076 const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
3077 const bool is_array = instr.tmml.array != 0;
3078 auto texture_type = instr.tmml.texture_type.Value();
3079 const std::string sampler =
3080 GetSampler(instr.sampler, texture_type, is_array, false);
3081
3082 const auto scope = shader.Scope();
3083
3084 // TODO: Add coordinates for different samplers once other texture types are
3085 // implemented.
3086 switch (texture_type) {
3087 case Tegra::Shader::TextureType::Texture1D: {
3088 shader.AddLine("float coords = " + x + ';');
3089 break;
3090 }
3091 case Tegra::Shader::TextureType::Texture2D: {
3092 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
3093 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
3094 break;
3095 }
3096 default:
3097 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
3098
3099 // Fallback to interpreting as a 2D texture for now
3100 const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
3101 shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
3102 texture_type = Tegra::Shader::TextureType::Texture2D;
3103 }
3104
3105 const std::string texture = "textureQueryLod(" + sampler + ", coords)";
3106 shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);");
3107
3108 regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1);
3109 regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1);
3110 break;
3111 }
3112 default: {
3113 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
3114 }
3115 }
3116 break;
3117 }
3118 case OpCode::Type::FloatSetPredicate: {
3119 const std::string op_a =
3120 GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), instr.fsetp.abs_a != 0,
3121 instr.fsetp.neg_a != 0);
3122
3123 std::string op_b;
3124
3125 if (instr.is_b_imm) {
3126 op_b += '(' + GetImmediate19(instr) + ')';
3127 } else {
3128 if (instr.is_b_gpr) {
3129 op_b += regs.GetRegisterAsFloat(instr.gpr20);
3130 } else {
3131 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3132 GLSLRegister::Type::Float);
3133 }
3134 }
3135
3136 if (instr.fsetp.abs_b) {
3137 op_b = "abs(" + op_b + ')';
3138 }
3139
3140 // We can't use the constant predicate as destination.
3141 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
3142
3143 const std::string second_pred =
3144 GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
3145
3146 const std::string combiner = GetPredicateCombiner(instr.fsetp.op);
3147
3148 const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
3149 // Set the primary predicate to the result of Predicate OP SecondPredicate
3150 SetPredicate(instr.fsetp.pred3,
3151 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
3152
3153 if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
3154 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
3155 // if enabled
3156 SetPredicate(instr.fsetp.pred0,
3157 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
3158 }
3159 break;
3160 }
3161 case OpCode::Type::IntegerSetPredicate: {
3162 const std::string op_a =
3163 regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
3164 std::string op_b;
3165
3166 if (instr.is_b_imm) {
3167 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
3168 } else {
3169 if (instr.is_b_gpr) {
3170 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
3171 } else {
3172 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3173 GLSLRegister::Type::Integer);
3174 }
3175 }
3176
3177 // We can't use the constant predicate as destination.
3178 ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
3179
3180 const std::string second_pred =
3181 GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
3182
3183 const std::string combiner = GetPredicateCombiner(instr.isetp.op);
3184
3185 const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
3186 // Set the primary predicate to the result of Predicate OP SecondPredicate
3187 SetPredicate(instr.isetp.pred3,
3188 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
3189
3190 if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
3191 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
3192 // if enabled
3193 SetPredicate(instr.isetp.pred0,
3194 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
3195 }
3196 break;
3197 }
3198 case OpCode::Type::HalfSetPredicate: {
3199 UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
3200
3201 const std::string op_a =
3202 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a,
3203 instr.hsetp2.abs_a, instr.hsetp2.negate_a);
3204
3205 const std::string op_b = [&]() {
3206 switch (opcode->get().GetId()) {
3207 case OpCode::Id::HSETP2_R:
3208 return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
3209 instr.hsetp2.type_b, instr.hsetp2.abs_a,
3210 instr.hsetp2.negate_b);
3211 default:
3212 UNREACHABLE();
3213 return std::string("vec2(0)");
3214 }
3215 }();
3216
3217 // We can't use the constant predicate as destination.
3218 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
3219
3220 const std::string second_pred =
3221 GetPredicateCondition(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
3222
3223 const std::string combiner = GetPredicateCombiner(instr.hsetp2.op);
3224
3225 const std::string component_combiner = instr.hsetp2.h_and ? "&&" : "||";
3226 const std::string predicate =
3227 '(' + GetPredicateComparison(instr.hsetp2.cond, op_a + ".x", op_b + ".x") + ' ' +
3228 component_combiner + ' ' +
3229 GetPredicateComparison(instr.hsetp2.cond, op_a + ".y", op_b + ".y") + ')';
3230
3231 // Set the primary predicate to the result of Predicate OP SecondPredicate
3232 SetPredicate(instr.hsetp2.pred3,
3233 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
3234
3235 if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
3236 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
3237 // if enabled
3238 SetPredicate(instr.hsetp2.pred0,
3239 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
3240 }
3241 break;
3242 }
3243 case OpCode::Type::PredicateSetRegister: {
3244 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
3245 "Condition codes generation in PSET is partially implemented");
3246
3247 const std::string op_a =
3248 GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
3249 const std::string op_b =
3250 GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0);
3251
3252 const std::string second_pred =
3253 GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0);
3254
3255 const std::string combiner = GetPredicateCombiner(instr.pset.op);
3256
3257 const std::string predicate =
3258 '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')';
3259 const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
3260 if (instr.pset.bf == 0) {
3261 const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
3262 regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1, false,
3263 instr.generates_cc);
3264 } else {
3265 const std::string value = '(' + result + ") ? 1.0 : 0.0";
3266 regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1, false, instr.generates_cc);
3267 }
3268 break;
3269 }
3270 case OpCode::Type::PredicateSetPredicate: {
3271 switch (opcode->get().GetId()) {
3272 case OpCode::Id::PSETP: {
3273 const std::string op_a =
3274 GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
3275 const std::string op_b =
3276 GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
3277
3278 // We can't use the constant predicate as destination.
3279 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
3280
3281 const std::string second_pred =
3282 GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
3283
3284 const std::string combiner = GetPredicateCombiner(instr.psetp.op);
3285
3286 const std::string predicate =
3287 '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
3288
3289 // Set the primary predicate to the result of Predicate OP SecondPredicate
3290 SetPredicate(instr.psetp.pred3,
3291 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
3292
3293 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
3294 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
3295 // if enabled
3296 SetPredicate(instr.psetp.pred0,
3297 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
3298 }
3299 break;
3300 }
3301 case OpCode::Id::CSETP: {
3302 const std::string pred =
3303 GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
3304 const std::string combiner = GetPredicateCombiner(instr.csetp.op);
3305 const std::string condition_code = regs.GetConditionCode(instr.csetp.cc);
3306 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
3307 SetPredicate(instr.csetp.pred3,
3308 '(' + condition_code + ") " + combiner + " (" + pred + ')');
3309 }
3310 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
3311 SetPredicate(instr.csetp.pred0,
3312 "!(" + condition_code + ") " + combiner + " (" + pred + ')');
3313 }
3314 break;
3315 }
3316 default: {
3317 UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
3318 }
3319 }
3320 break;
3321 }
3322 case OpCode::Type::RegisterSetPredicate: {
3323 UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
3324
3325 const std::string apply_mask = [&]() {
3326 switch (opcode->get().GetId()) {
3327 case OpCode::Id::R2P_IMM:
3328 return std::to_string(instr.r2p.immediate_mask);
3329 default:
3330 UNREACHABLE();
3331 return std::to_string(instr.r2p.immediate_mask);
3332 }
3333 }();
3334 const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
3335 " >> " + std::to_string(instr.r2p.byte) + ')';
3336
3337 constexpr u64 programmable_preds = 7;
3338 for (u64 pred = 0; pred < programmable_preds; ++pred) {
3339 const auto shift = std::to_string(1 << pred);
3340
3341 shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {");
3342 ++shader.scope;
3343
3344 SetPredicate(pred, '(' + mask + " & " + shift + ") != 0");
3345
3346 --shader.scope;
3347 shader.AddLine('}');
3348 }
3349 break;
3350 }
3351 case OpCode::Type::FloatSet: {
3352 const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8),
3353 instr.fset.abs_a != 0, instr.fset.neg_a != 0);
3354
3355 std::string op_b;
3356
3357 if (instr.is_b_imm) {
3358 const std::string imm = GetImmediate19(instr);
3359 op_b = imm;
3360 } else {
3361 if (instr.is_b_gpr) {
3362 op_b = regs.GetRegisterAsFloat(instr.gpr20);
3363 } else {
3364 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3365 GLSLRegister::Type::Float);
3366 }
3367 }
3368
3369 op_b = GetOperandAbsNeg(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
3370
3371 // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
3372 // condition is true, and to 0 otherwise.
3373 const std::string second_pred =
3374 GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
3375
3376 const std::string combiner = GetPredicateCombiner(instr.fset.op);
3377
3378 const std::string predicate = "((" +
3379 GetPredicateComparison(instr.fset.cond, op_a, op_b) +
3380 ") " + combiner + " (" + second_pred + "))";
3381
3382 if (instr.fset.bf) {
3383 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1, false,
3384 instr.generates_cc);
3385 } else {
3386 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
3387 1, false, instr.generates_cc);
3388 }
3389 break;
3390 }
3391 case OpCode::Type::IntegerSet: {
3392 const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
3393
3394 std::string op_b;
3395
3396 if (instr.is_b_imm) {
3397 op_b = std::to_string(instr.alu.GetSignedImm20_20());
3398 } else {
3399 if (instr.is_b_gpr) {
3400 op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, instr.iset.is_signed);
3401 } else {
3402 op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3403 GLSLRegister::Type::Integer);
3404 }
3405 }
3406
3407 // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
3408 // condition is true, and to 0 otherwise.
3409 const std::string second_pred =
3410 GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
3411
3412 const std::string combiner = GetPredicateCombiner(instr.iset.op);
3413
3414 const std::string predicate = "((" +
3415 GetPredicateComparison(instr.iset.cond, op_a, op_b) +
3416 ") " + combiner + " (" + second_pred + "))";
3417
3418 if (instr.iset.bf) {
3419 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
3420 } else {
3421 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
3422 1);
3423 }
3424 break;
3425 }
3426 case OpCode::Type::HalfSet: {
3427 UNIMPLEMENTED_IF(instr.hset2.ftz != 0);
3428
3429 const std::string op_a =
3430 GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a,
3431 instr.hset2.abs_a != 0, instr.hset2.negate_a != 0);
3432
3433 const std::string op_b = [&]() {
3434 switch (opcode->get().GetId()) {
3435 case OpCode::Id::HSET2_R:
3436 return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
3437 instr.hset2.type_b, instr.hset2.abs_b != 0,
3438 instr.hset2.negate_b != 0);
3439 default:
3440 UNREACHABLE();
3441 return std::string("vec2(0)");
3442 }
3443 }();
3444
3445 const std::string second_pred =
3446 GetPredicateCondition(instr.hset2.pred39, instr.hset2.neg_pred != 0);
3447
3448 const std::string combiner = GetPredicateCombiner(instr.hset2.op);
3449
3450 // HSET2 operates on each half float in the pack.
3451 std::string result;
3452 for (int i = 0; i < 2; ++i) {
3453 const std::string float_value = i == 0 ? "0x00003c00" : "0x3c000000";
3454 const std::string integer_value = i == 0 ? "0x0000ffff" : "0xffff0000";
3455 const std::string value = instr.hset2.bf == 1 ? float_value : integer_value;
3456
3457 const std::string comp = std::string(".") + "xy"[i];
3458 const std::string predicate =
3459 "((" + GetPredicateComparison(instr.hset2.cond, op_a + comp, op_b + comp) +
3460 ") " + combiner + " (" + second_pred + "))";
3461
3462 result += '(' + predicate + " ? " + value + " : 0)";
3463 if (i == 0) {
3464 result += " | ";
3465 }
3466 }
3467 regs.SetRegisterToInteger(instr.gpr0, false, 0, '(' + result + ')', 1, 1);
3468 break;
3469 }
3470 case OpCode::Type::Xmad: {
3471 UNIMPLEMENTED_IF(instr.xmad.sign_a);
3472 UNIMPLEMENTED_IF(instr.xmad.sign_b);
3473 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
3474 "Condition codes generation in XMAD is partially implemented");
3475
3476 std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)};
3477 std::string op_b;
3478 std::string op_c;
3479
3480 // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
3481 UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
3482 const bool is_signed{instr.xmad.sign_a == 1};
3483
3484 bool is_merge{};
3485 switch (opcode->get().GetId()) {
3486 case OpCode::Id::XMAD_CR: {
3487 is_merge = instr.xmad.merge_56;
3488 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3489 instr.xmad.sign_b ? GLSLRegister::Type::Integer
3490 : GLSLRegister::Type::UnsignedInteger);
3491 op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
3492 break;
3493 }
3494 case OpCode::Id::XMAD_RR: {
3495 is_merge = instr.xmad.merge_37;
3496 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b);
3497 op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
3498 break;
3499 }
3500 case OpCode::Id::XMAD_RC: {
3501 op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b);
3502 op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
3503 is_signed ? GLSLRegister::Type::Integer
3504 : GLSLRegister::Type::UnsignedInteger);
3505 break;
3506 }
3507 case OpCode::Id::XMAD_IMM: {
3508 is_merge = instr.xmad.merge_37;
3509 op_b += std::to_string(instr.xmad.imm20_16);
3510 op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
3511 break;
3512 }
3513 default: {
3514 UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
3515 }
3516 }
3517
3518 // TODO(bunnei): Ensure this is right with signed operands
3519 if (instr.xmad.high_a) {
3520 op_a = "((" + op_a + ") >> 16)";
3521 } else {
3522 op_a = "((" + op_a + ") & 0xFFFF)";
3523 }
3524
3525 std::string src2 = '(' + op_b + ')'; // Preserve original source 2
3526 if (instr.xmad.high_b) {
3527 op_b = '(' + src2 + " >> 16)";
3528 } else {
3529 op_b = '(' + src2 + " & 0xFFFF)";
3530 }
3531
3532 std::string product = '(' + op_a + " * " + op_b + ')';
3533 if (instr.xmad.product_shift_left) {
3534 product = '(' + product + " << 16)";
3535 }
3536
3537 switch (instr.xmad.mode) {
3538 case Tegra::Shader::XmadMode::None:
3539 break;
3540 case Tegra::Shader::XmadMode::CLo:
3541 op_c = "((" + op_c + ") & 0xFFFF)";
3542 break;
3543 case Tegra::Shader::XmadMode::CHi:
3544 op_c = "((" + op_c + ") >> 16)";
3545 break;
3546 case Tegra::Shader::XmadMode::CBcc:
3547 op_c = "((" + op_c + ") + (" + src2 + "<< 16))";
3548 break;
3549 default: {
3550 UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}",
3551 static_cast<u32>(instr.xmad.mode.Value()));
3552 }
3553 }
3554
3555 std::string sum{'(' + product + " + " + op_c + ')'};
3556 if (is_merge) {
3557 sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))";
3558 }
3559
3560 regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1, false,
3561 instr.generates_cc);
3562 break;
3563 }
3564 default: {
3565 switch (opcode->get().GetId()) {
3566 case OpCode::Id::EXIT: {
3567 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3568 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3569 "EXIT condition code used: {}", static_cast<u32>(cc));
3570
3571 if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
3572 EmitFragmentOutputsWrite();
3573 }
3574
3575 switch (instr.flow.cond) {
3576 case Tegra::Shader::FlowCondition::Always:
3577 shader.AddLine("return true;");
3578 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
3579 // If this is an unconditional exit then just end processing here,
3580 // otherwise we have to account for the possibility of the condition
3581 // not being met, so continue processing the next instruction.
3582 offset = PROGRAM_END - 1;
3583 }
3584 break;
3585
3586 case Tegra::Shader::FlowCondition::Fcsm_Tr:
3587 // TODO(bunnei): What is this used for? If we assume this conditon is not
3588 // satisifed, dual vertex shaders in Farming Simulator make more sense
3589 UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
3590 break;
3591
3592 default:
3593 UNIMPLEMENTED_MSG("Unhandled flow condition: {}",
3594 static_cast<u32>(instr.flow.cond.Value()));
3595 }
3596 break;
3597 }
3598 case OpCode::Id::KIL: {
3599 UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
3600
3601 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3602 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3603 "KIL condition code used: {}", static_cast<u32>(cc));
3604
3605 // Enclose "discard" in a conditional, so that GLSL compilation does not complain
3606 // about unexecuted instructions that may follow this.
3607 shader.AddLine("if (true) {");
3608 ++shader.scope;
3609 shader.AddLine("discard;");
3610 --shader.scope;
3611 shader.AddLine("}");
3612
3613 break;
3614 }
3615 case OpCode::Id::OUT_R: {
3616 UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
3617 "Stream buffer is not supported");
3618 ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
3619 "OUT is expected to be used in a geometry shader.");
3620
3621 if (instr.out.emit) {
3622 // gpr0 is used to store the next address. Hardware returns a pointer but
3623 // we just return the next index with a cyclic cap.
3624 const std::string current{regs.GetRegisterAsInteger(instr.gpr8, 0, false)};
3625 const std::string next = "((" + current + " + 1" + ") % " +
3626 std::to_string(MAX_GEOMETRY_BUFFERS) + ')';
3627 shader.AddLine("emit_vertex(" + current + ");");
3628 regs.SetRegisterToInteger(instr.gpr0, false, 0, next, 1, 1);
3629 }
3630 if (instr.out.cut) {
3631 shader.AddLine("EndPrimitive();");
3632 }
3633
3634 break;
3635 }
3636 case OpCode::Id::MOV_SYS: {
3637 switch (instr.sys20) {
3638 case Tegra::Shader::SystemVariable::InvocationInfo: {
3639 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
3640 regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1);
3641 break;
3642 }
3643 case Tegra::Shader::SystemVariable::Ydirection: {
3644 // Config pack's third value is Y_NEGATE's state.
3645 regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1);
3646 break;
3647 }
3648 default: {
3649 UNIMPLEMENTED_MSG("Unhandled system move: {}",
3650 static_cast<u32>(instr.sys20.Value()));
3651 }
3652 }
3653 break;
3654 }
3655 case OpCode::Id::ISBERD: {
3656 UNIMPLEMENTED_IF(instr.isberd.o != 0);
3657 UNIMPLEMENTED_IF(instr.isberd.skew != 0);
3658 UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
3659 UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
3660 ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
3661 "ISBERD is expected to be used in a geometry shader.");
3662 LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
3663 regs.SetRegisterToFloat(instr.gpr0, 0, regs.GetRegisterAsFloat(instr.gpr8), 1, 1);
3664 break;
3665 }
3666 case OpCode::Id::BRA: {
3667 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
3668 "BRA with constant buffers are not implemented");
3669
3670 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3671 const u32 target = offset + instr.bra.GetBranchTarget();
3672 if (cc != Tegra::Shader::ConditionCode::T) {
3673 const std::string condition_code = regs.GetConditionCode(cc);
3674 shader.AddLine("if (" + condition_code + "){");
3675 shader.scope++;
3676 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
3677 shader.scope--;
3678 shader.AddLine('}');
3679 } else {
3680 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
3681 }
3682 break;
3683 }
3684 case OpCode::Id::IPA: {
3685 const auto& attribute = instr.attribute.fmt28;
3686 const auto& reg = instr.gpr0;
3687
3688 Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
3689 instr.ipa.sample_mode.Value()};
3690 regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index,
3691 input_mode);
3692
3693 if (instr.ipa.saturate) {
3694 regs.SetRegisterToFloat(reg, 0, regs.GetRegisterAsFloat(reg), 1, 1, true);
3695 }
3696 break;
3697 }
3698 case OpCode::Id::SSY: {
3699 // The SSY opcode tells the GPU where to re-converge divergent execution paths, it
3700 // sets the target of the jump that the SYNC instruction will make. The SSY opcode
3701 // has a similar structure to the BRA opcode.
3702 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
3703 "Constant buffer flow is not supported");
3704
3705 const u32 target = offset + instr.bra.GetBranchTarget();
3706 EmitPushToFlowStack(target);
3707 break;
3708 }
3709 case OpCode::Id::PBK: {
3710 // PBK pushes to a stack the address where BRK will jump to. This shares stack with
3711 // SSY but using SYNC on a PBK address will kill the shader execution. We don't
3712 // emulate this because it's very unlikely a driver will emit such invalid shader.
3713 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
3714 "Constant buffer PBK is not supported");
3715
3716 const u32 target = offset + instr.bra.GetBranchTarget();
3717 EmitPushToFlowStack(target);
3718 break;
3719 }
3720 case OpCode::Id::SYNC: {
3721 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3722 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3723 "SYNC condition code used: {}", static_cast<u32>(cc));
3724
3725 // The SYNC opcode jumps to the address previously set by the SSY opcode
3726 EmitPopFromFlowStack();
3727 break;
3728 }
3729 case OpCode::Id::BRK: {
3730 // The BRK opcode jumps to the address previously set by the PBK opcode
3731 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
3732 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T,
3733 "BRK condition code used: {}", static_cast<u32>(cc));
3734
3735 EmitPopFromFlowStack();
3736 break;
3737 }
3738 case OpCode::Id::DEPBAR: {
3739 // TODO(Subv): Find out if we actually have to care about this instruction or if
3740 // the GLSL compiler takes care of that for us.
3741 LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
3742 break;
3743 }
3744 case OpCode::Id::VMAD: {
3745 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
3746 "Condition codes generation in VMAD is not implemented");
3747
3748 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
3749 const std::string op_a = GetVideoOperandA(instr);
3750 const std::string op_b = GetVideoOperandB(instr);
3751 const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
3752
3753 std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
3754
3755 switch (instr.vmad.shr) {
3756 case Tegra::Shader::VmadShr::Shr7:
3757 result = '(' + result + " >> 7)";
3758 break;
3759 case Tegra::Shader::VmadShr::Shr15:
3760 result = '(' + result + " >> 15)";
3761 break;
3762 }
3763
3764 regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
3765 instr.vmad.saturate, instr.vmad.cc);
3766 break;
3767 }
3768 case OpCode::Id::VSETP: {
3769 const std::string op_a = GetVideoOperandA(instr);
3770 const std::string op_b = GetVideoOperandB(instr);
3771
3772 // We can't use the constant predicate as destination.
3773 ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
3774
3775 const std::string second_pred = GetPredicateCondition(instr.vsetp.pred39, false);
3776
3777 const std::string combiner = GetPredicateCombiner(instr.vsetp.op);
3778
3779 const std::string predicate = GetPredicateComparison(instr.vsetp.cond, op_a, op_b);
3780 // Set the primary predicate to the result of Predicate OP SecondPredicate
3781 SetPredicate(instr.vsetp.pred3,
3782 '(' + predicate + ") " + combiner + " (" + second_pred + ')');
3783
3784 if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
3785 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
3786 // if enabled
3787 SetPredicate(instr.vsetp.pred0,
3788 "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
3789 }
3790 break;
3791 }
3792 default: {
3793 UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
3794 break;
3795 }
3796 }
3797
3798 break;
3799 }
3800 }
3801
3802 // Close the predicate condition scope.
3803 if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
3804 --shader.scope;
3805 shader.AddLine('}');
3806 }
3807
3808 return offset + 1;
3809 }
3810
3811 /**
3812 * Compiles a range of instructions from Tegra to GLSL.
3813 * @param begin the offset of the starting instruction.
3814 * @param end the offset where the compilation should stop (exclusive).
3815 * @return the offset of the next instruction to compile. PROGRAM_END if the program
3816 * terminates.
3817 */
3818 u32 CompileRange(u32 begin, u32 end) {
3819 u32 program_counter;
3820 for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) {
3821 program_counter = CompileInstr(program_counter);
3822 }
3823 return program_counter;
3824 }
3825
3826 void Generate(const std::string& suffix) {
3827 // Add declarations for all subroutines
3828 for (const auto& subroutine : subroutines) {
3829 shader.AddLine("bool " + subroutine.GetName() + "();");
3830 }
3831 shader.AddNewLine();
3832
3833 // Add the main entry point
3834 shader.AddLine("bool exec_" + suffix + "() {");
3835 ++shader.scope;
3836 CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
3837 --shader.scope;
3838 shader.AddLine("}\n");
3839
3840 // Add definitions for all subroutines
3841 for (const auto& subroutine : subroutines) {
3842 std::set<u32> labels = subroutine.labels;
3843
3844 shader.AddLine("bool " + subroutine.GetName() + "() {");
3845 ++shader.scope;
3846
3847 if (labels.empty()) {
3848 if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) {
3849 shader.AddLine("return false;");
3850 }
3851 } else {
3852 labels.insert(subroutine.begin);
3853 shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
3854
3855 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
3856 // unlikely that shaders will use 20 nested SSYs and PBKs.
3857 constexpr u32 FLOW_STACK_SIZE = 20;
3858 shader.AddLine("uint flow_stack[" + std::to_string(FLOW_STACK_SIZE) + "];");
3859 shader.AddLine("uint flow_stack_top = 0u;");
3860
3861 shader.AddLine("while (true) {");
3862 ++shader.scope;
3863
3864 shader.AddLine("switch (jmp_to) {");
3865
3866 for (auto label : labels) {
3867 shader.AddLine("case " + std::to_string(label) + "u: {");
3868 ++shader.scope;
3869
3870 const auto next_it = labels.lower_bound(label + 1);
3871 const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
3872
3873 const u32 compile_end = CompileRange(label, next_label);
3874 if (compile_end > next_label && compile_end != PROGRAM_END) {
3875 // This happens only when there is a label inside a IF/LOOP block
3876 shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }");
3877 labels.emplace(compile_end);
3878 }
3879
3880 --shader.scope;
3881 shader.AddLine('}');
3882 }
3883
3884 shader.AddLine("default: return false;");
3885 shader.AddLine('}');
3886
3887 --shader.scope;
3888 shader.AddLine('}');
3889
3890 shader.AddLine("return false;");
3891 }
3892
3893 --shader.scope;
3894 shader.AddLine("}\n");
3895
3896 DEBUG_ASSERT(shader.scope == 0);
3897 }
3898
3899 GenerateDeclarations();
3900 }
3901
3902 /// Add declarations for registers
3903 void GenerateDeclarations() {
3904 regs.GenerateDeclarations(suffix);
3905
3906 for (const auto& pred : declr_predicates) {
3907 declarations.AddLine("bool " + pred + " = false;");
3908 }
3909 declarations.AddNewLine();
3910 }
3911
3912private:
3913 const std::set<Subroutine>& subroutines;
3914 const ProgramCode& program_code;
3915 Tegra::Shader::Header header;
3916 const u32 main_offset;
3917 Maxwell3D::Regs::ShaderStage stage;
3918 const std::string& suffix;
3919 u64 local_memory_size;
3920 std::size_t shader_length;
3921
3922 ShaderWriter shader;
3923 ShaderWriter declarations;
3924 GLSLRegisterManager regs{shader, declarations, stage, suffix, header};
3925
3926 // Declarations
3927 std::set<std::string> declr_predicates;
3928}; // namespace OpenGL::GLShader::Decompiler
3929
3930std::string GetCommonDeclarations() {
3931 return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n",
3932 RasterizerOpenGL::MaxConstbufferSize / sizeof(GLvec4));
3933}
3934
3935std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
3936 Maxwell3D::Regs::ShaderStage stage,
3937 const std::string& suffix) {
3938 try {
3939 ControlFlowAnalyzer analyzer(program_code, main_offset, suffix);
3940 const auto subroutines = analyzer.GetSubroutines();
3941 GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix,
3942 analyzer.GetShaderLength());
3943 return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
3944 } catch (const DecompileFail& exception) {
3945 LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
3946 }
3947 return {};
3948}
3949
3950} // namespace OpenGL::GLShader::Decompiler
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
deleted file mode 100644
index d01a4a7ee..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ /dev/null
@@ -1,25 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <functional>
9#include <optional>
10#include <string>
11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_opengl/gl_shader_gen.h"
14
15namespace OpenGL::GLShader::Decompiler {
16
17using Tegra::Engines::Maxwell3D;
18
19std::string GetCommonDeclarations();
20
21std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
22 Maxwell3D::Regs::ShaderStage stage,
23 const std::string& suffix);
24
25} // namespace OpenGL::GLShader::Decompiler
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 5d0819dc5..59f45cde3 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -5,24 +5,27 @@
5#include <fmt/format.h> 5#include <fmt/format.h>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "video_core/engines/maxwell_3d.h" 7#include "video_core/engines/maxwell_3d.h"
8#include "video_core/renderer_opengl/gl_shader_decompiler.h"
9#include "video_core/renderer_opengl/gl_shader_gen.h" 8#include "video_core/renderer_opengl/gl_shader_gen.h"
9#include "video_core/shader/glsl_decompiler.h"
10#include "video_core/shader/shader_ir.h"
10 11
11namespace OpenGL::GLShader { 12namespace OpenGL::GLShader {
12 13
13using Tegra::Engines::Maxwell3D; 14using Tegra::Engines::Maxwell3D;
15using VideoCommon::Shader::ProgramCode;
16using VideoCommon::Shader::ShaderIR;
14 17
15static constexpr u32 PROGRAM_OFFSET{10}; 18static constexpr u32 PROGRAM_OFFSET{10};
16 19
17ProgramResult GenerateVertexShader(const ShaderSetup& setup) { 20ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
18 std::string out = "#version 430 core\n";
19 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
20 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 21 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
21 out += "// Shader Unique Id: VS" + id + "\n\n";
22 out += Decompiler::GetCommonDeclarations();
23 22
24 out += R"( 23 std::string out = "#version 430 core\n";
24 out += "// Shader Unique Id: VS" + id + '\n';
25 out += "#extension GL_ARB_separate_shader_objects : enable\n";
26 out += GetCommonDeclarations();
25 27
28 out += R"(
26layout (location = 0) out vec4 position; 29layout (location = 0) out vec4 position;
27 30
28layout(std140) uniform vs_config { 31layout(std140) uniform vs_config {
@@ -31,39 +34,30 @@ layout(std140) uniform vs_config {
31 uvec4 alpha_test; 34 uvec4 alpha_test;
32}; 35};
33)"; 36)";
34 37 ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
35 if (setup.IsDualProgram()) { 38 ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
36 out += "bool exec_vertex_b();\n";
37 }
38
39 ProgramResult program =
40 Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
41 Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
42 .value_or(ProgramResult());
43 39
44 out += program.first; 40 out += program.first;
45 41
46 if (setup.IsDualProgram()) { 42 if (setup.IsDualProgram()) {
43 ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
47 ProgramResult program_b = 44 ProgramResult program_b =
48 Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET, 45 Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
49 Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b") 46
50 .value_or(ProgramResult());
51 out += program_b.first; 47 out += program_b.first;
52 } 48 }
53 49
54 out += R"( 50 out += R"(
55
56void main() { 51void main() {
57 position = vec4(0.0, 0.0, 0.0, 0.0); 52 position = vec4(0.0, 0.0, 0.0, 0.0);
58 exec_vertex(); 53 execute_vertex();
59)"; 54)";
60 55
61 if (setup.IsDualProgram()) { 56 if (setup.IsDualProgram()) {
62 out += " exec_vertex_b();"; 57 out += " execute_vertex_b();";
63 } 58 }
64 59
65 out += R"( 60 out += R"(
66
67 // Check if the flip stage is VertexB 61 // Check if the flip stage is VertexB
68 // Config pack's second value is flip_stage 62 // Config pack's second value is flip_stage
69 if (config_pack[1] == 1) { 63 if (config_pack[1] == 1) {
@@ -77,25 +71,23 @@ void main() {
77 if (config_pack[1] == 1) { 71 if (config_pack[1] == 1) {
78 position.w = 1.0; 72 position.w = 1.0;
79 } 73 }
80} 74})";
81
82)";
83 75
84 return {out, program.second}; 76 return {out, program.second};
85} 77}
86 78
87ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { 79ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
88 // Version is intentionally skipped in shader generation, it's added by the lazy compilation. 80 // Version is intentionally skipped in shader generation, it's added by the lazy compilation.
89 std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
90 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 81 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
91 out += "// Shader Unique Id: GS" + id + "\n\n";
92 out += Decompiler::GetCommonDeclarations();
93 out += "bool exec_geometry();\n";
94 82
83 std::string out = out += "// Shader Unique Id: GS" + id + '\n';
84 out += "#extension GL_ARB_separate_shader_objects : enable\n";
85 out += GetCommonDeclarations();
86
87 ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
95 ProgramResult program = 88 ProgramResult program =
96 Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, 89 Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
97 Maxwell3D::Regs::ShaderStage::Geometry, "geometry") 90
98 .value_or(ProgramResult());
99 out += R"( 91 out += R"(
100out gl_PerVertex { 92out gl_PerVertex {
101 vec4 gl_Position; 93 vec4 gl_Position;
@@ -109,28 +101,26 @@ layout (std140) uniform gs_config {
109 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding 101 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
110 uvec4 alpha_test; 102 uvec4 alpha_test;
111}; 103};
104)";
105
106 out += program.first;
112 107
108 out = R"(
113void main() { 109void main() {
114 exec_geometry(); 110 execute_geometry();
115} 111};)";
116 112
117)";
118 out += program.first;
119 return {out, program.second}; 113 return {out, program.second};
120} 114}
121 115
122ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { 116ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
123 std::string out = "#version 430 core\n";
124 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
125 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 117 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
126 out += "// Shader Unique Id: FS" + id + "\n\n";
127 out += Decompiler::GetCommonDeclarations();
128 out += "bool exec_fragment();\n";
129 118
130 ProgramResult program = 119 std::string out = "#version 430 core\n";
131 Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, 120 out += "// Shader Unique Id: FS" + id + '\n';
132 Maxwell3D::Regs::ShaderStage::Fragment, "fragment") 121 out += "#extension GL_ARB_separate_shader_objects : enable\n";
133 .value_or(ProgramResult()); 122 out += GetCommonDeclarations();
123
134 out += R"( 124 out += R"(
135layout(location = 0) out vec4 FragColor0; 125layout(location = 0) out vec4 FragColor0;
136layout(location = 1) out vec4 FragColor1; 126layout(location = 1) out vec4 FragColor1;
@@ -171,14 +161,20 @@ bool AlphaFunc(in float value) {
171 default: 161 default:
172 return false; 162 return false;
173 } 163 }
174} 164})";
165
166 ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
167 ProgramResult program =
168 Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
175 169
170 out += program.first;
171
172 out += R"(
176void main() { 173void main() {
177 exec_fragment(); 174 execute_fragment();
178} 175}
179 176
180)"; 177)";
181 out += program.first;
182 return {out, program.second}; 178 return {out, program.second};
183} 179}
184} // namespace OpenGL::GLShader 180} // namespace OpenGL::GLShader \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index fcc20d3b4..b14bdb29c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -10,164 +10,12 @@
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/glsl_decompiler.h"
14#include "video_core/shader/shader_ir.h"
13 15
14namespace OpenGL::GLShader { 16namespace OpenGL::GLShader {
15 17
16constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; 18using VideoCommon::Shader::ProgramCode;
17using ProgramCode = std::vector<u64>;
18
19enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
20
21class ConstBufferEntry {
22 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23
24public:
25 void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
26 is_used = true;
27 this->index = static_cast<unsigned>(index);
28 this->stage = stage;
29 max_offset = std::max(max_offset, static_cast<unsigned>(offset));
30 }
31
32 void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
33 is_used = true;
34 is_indirect = true;
35 this->index = static_cast<unsigned>(index);
36 this->stage = stage;
37 }
38
39 bool IsUsed() const {
40 return is_used;
41 }
42
43 bool IsIndirect() const {
44 return is_indirect;
45 }
46
47 unsigned GetIndex() const {
48 return index;
49 }
50
51 unsigned GetSize() const {
52 return max_offset + 1;
53 }
54
55 std::string GetName() const {
56 return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
57 }
58
59 u32 GetHash() const {
60 return (static_cast<u32>(stage) << 16) | index;
61 }
62
63private:
64 static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = {
65 "buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c",
66 };
67
68 bool is_used{};
69 bool is_indirect{};
70 unsigned index{};
71 unsigned max_offset{};
72 Maxwell::ShaderStage stage;
73};
74
75class SamplerEntry {
76 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
77
78public:
79 SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
80 Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
81 : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array),
82 is_shadow(is_shadow) {}
83
84 std::size_t GetOffset() const {
85 return offset;
86 }
87
88 std::size_t GetIndex() const {
89 return sampler_index;
90 }
91
92 Maxwell::ShaderStage GetStage() const {
93 return stage;
94 }
95
96 std::string GetName() const {
97 return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
98 std::to_string(sampler_index);
99 }
100
101 std::string GetTypeString() const {
102 using Tegra::Shader::TextureType;
103 std::string glsl_type;
104
105 switch (type) {
106 case TextureType::Texture1D:
107 glsl_type = "sampler1D";
108 break;
109 case TextureType::Texture2D:
110 glsl_type = "sampler2D";
111 break;
112 case TextureType::Texture3D:
113 glsl_type = "sampler3D";
114 break;
115 case TextureType::TextureCube:
116 glsl_type = "samplerCube";
117 break;
118 default:
119 UNIMPLEMENTED();
120 }
121 if (is_array)
122 glsl_type += "Array";
123 if (is_shadow)
124 glsl_type += "Shadow";
125 return glsl_type;
126 }
127
128 Tegra::Shader::TextureType GetType() const {
129 return type;
130 }
131
132 bool IsArray() const {
133 return is_array;
134 }
135
136 bool IsShadow() const {
137 return is_shadow;
138 }
139
140 u32 GetHash() const {
141 return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
142 }
143
144 static std::string GetArrayName(Maxwell::ShaderStage stage) {
145 return TextureSamplerNames[static_cast<std::size_t>(stage)];
146 }
147
148private:
149 static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
150 "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
151 };
152
153 /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
154 /// instruction.
155 std::size_t offset;
156 Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
157 std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
158 Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
159 bool is_array; ///< Whether the texture is being sampled as an array texture or not.
160 bool is_shadow; ///< Whether the texture is being sampled as a depth texture or not.
161};
162
163struct ShaderEntries {
164 std::vector<ConstBufferEntry> const_buffer_entries;
165 std::vector<SamplerEntry> texture_samplers;
166 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances;
167 std::size_t shader_length;
168};
169
170using ProgramResult = std::pair<std::string, ShaderEntries>;
171 19
172struct ShaderSetup { 20struct ShaderSetup {
173 explicit ShaderSetup(ProgramCode program_code) { 21 explicit ShaderSetup(ProgramCode program_code) {