diff options
| author | 2018-12-20 22:29:15 -0300 | |
|---|---|---|
| committer | 2019-01-15 17:54:50 -0300 | |
| commit | a4f052f6b3ea689539d3ccc11bde273986728d2e (patch) | |
| tree | 14d068354dcb3dc696c863b0199b8549ae1b7d88 /src | |
| parent | glsl_decompiler: Implementation (diff) | |
| download | yuzu-a4f052f6b3ea689539d3ccc11bde273986728d2e.tar.gz yuzu-a4f052f6b3ea689539d3ccc11bde273986728d2e.tar.xz yuzu-a4f052f6b3ea689539d3ccc11bde273986728d2e.zip | |
video_core: Replace gl_shader_decompiler
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 3950 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 25 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 94 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 158 |
8 files changed, 57 insertions, 4185 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 86b06487d..b68f3273d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -42,8 +42,6 @@ add_library(video_core STATIC | |||
| 42 | renderer_opengl/gl_resource_manager.h | 42 | renderer_opengl/gl_resource_manager.h |
| 43 | renderer_opengl/gl_shader_cache.cpp | 43 | renderer_opengl/gl_shader_cache.cpp |
| 44 | renderer_opengl/gl_shader_cache.h | 44 | renderer_opengl/gl_shader_cache.h |
| 45 | renderer_opengl/gl_shader_decompiler.cpp | ||
| 46 | renderer_opengl/gl_shader_decompiler.h | ||
| 47 | renderer_opengl/gl_shader_gen.cpp | 45 | renderer_opengl/gl_shader_gen.cpp |
| 48 | renderer_opengl/gl_shader_gen.h | 46 | renderer_opengl/gl_shader_gen.h |
| 49 | renderer_opengl/gl_shader_manager.cpp | 47 | renderer_opengl/gl_shader_manager.cpp |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 73567eb8c..97412590b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -925,7 +925,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | |||
| 925 | const auto& gpu = Core::System::GetInstance().GPU(); | 925 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 926 | const auto& maxwell3d = gpu.Maxwell3D(); | 926 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 927 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; | 927 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; |
| 928 | const auto& entries = shader->GetShaderEntries().const_buffer_entries; | 928 | const auto& entries = shader->GetShaderEntries().const_buffers; |
| 929 | 929 | ||
| 930 | constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; | 930 | constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; |
| 931 | std::array<GLuint, max_binds> bind_buffers; | 931 | std::array<GLuint, max_binds> bind_buffers; |
| @@ -993,7 +993,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, | |||
| 993 | MICROPROFILE_SCOPE(OpenGL_Texture); | 993 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 994 | const auto& gpu = Core::System::GetInstance().GPU(); | 994 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 995 | const auto& maxwell3d = gpu.Maxwell3D(); | 995 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 996 | const auto& entries = shader->GetShaderEntries().texture_samplers; | 996 | const auto& entries = shader->GetShaderEntries().samplers; |
| 997 | 997 | ||
| 998 | ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), | 998 | ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), |
| 999 | "Exceeded the number of active textures."); | 999 | "Exceeded the number of active textures."); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c785fffa3..e5435d733 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -12,9 +12,13 @@ | |||
| 12 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 12 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 14 | #include "video_core/renderer_opengl/utils.h" | 14 | #include "video_core/renderer_opengl/utils.h" |
| 15 | #include "video_core/shader/glsl_decompiler.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 15 | 17 | ||
| 16 | namespace OpenGL { | 18 | namespace OpenGL { |
| 17 | 19 | ||
| 20 | using VideoCommon::Shader::ProgramCode; | ||
| 21 | |||
| 18 | /// Gets the address for the specified shader stage program | 22 | /// Gets the address for the specified shader stage program |
| 19 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | 23 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { |
| 20 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 24 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| @@ -24,8 +28,8 @@ static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | |||
| 24 | } | 28 | } |
| 25 | 29 | ||
| 26 | /// Gets the shader program code from memory for the specified address | 30 | /// Gets the shader program code from memory for the specified address |
| 27 | static GLShader::ProgramCode GetShaderCode(VAddr addr) { | 31 | static ProgramCode GetShaderCode(VAddr addr) { |
| 28 | GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); | 32 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); |
| 29 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); | 33 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); |
| 30 | return program_code; | 34 | return program_code; |
| 31 | } | 35 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 768747968..aad1cf6be 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "video_core/rasterizer_cache.h" | 13 | #include "video_core/rasterizer_cache.h" |
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 15 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 15 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 16 | #include "video_core/shader/glsl_decompiler.h" | ||
| 16 | 17 | ||
| 17 | namespace OpenGL { | 18 | namespace OpenGL { |
| 18 | 19 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 1bb09e61b..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null | |||
| @@ -1,3950 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <map> | ||
| 6 | #include <optional> | ||
| 7 | #include <set> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <unordered_set> | ||
| 11 | |||
| 12 | #include <fmt/format.h> | ||
| 13 | |||
| 14 | #include "common/assert.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "video_core/engines/shader_bytecode.h" | ||
| 17 | #include "video_core/engines/shader_header.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 20 | |||
| 21 | namespace OpenGL::GLShader::Decompiler { | ||
| 22 | |||
| 23 | using Tegra::Shader::Attribute; | ||
| 24 | using Tegra::Shader::Instruction; | ||
| 25 | using Tegra::Shader::LogicOperation; | ||
| 26 | using Tegra::Shader::OpCode; | ||
| 27 | using Tegra::Shader::Register; | ||
| 28 | using Tegra::Shader::Sampler; | ||
| 29 | using Tegra::Shader::SubOp; | ||
| 30 | |||
| 31 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | ||
| 32 | constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header); | ||
| 33 | |||
| 34 | constexpr u32 MAX_GEOMETRY_BUFFERS = 6; | ||
| 35 | constexpr u32 MAX_ATTRIBUTES = 0x100; // Size in vec4s, this value is untested | ||
| 36 | |||
| 37 | static const char* INTERNAL_FLAG_NAMES[] = {"zero_flag", "sign_flag", "carry_flag", | ||
| 38 | "overflow_flag"}; | ||
| 39 | |||
| 40 | enum class InternalFlag : u64 { | ||
| 41 | ZeroFlag = 0, | ||
| 42 | SignFlag = 1, | ||
| 43 | CarryFlag = 2, | ||
| 44 | OverflowFlag = 3, | ||
| 45 | Amount | ||
| 46 | }; | ||
| 47 | |||
| 48 | class DecompileFail : public std::runtime_error { | ||
| 49 | public: | ||
| 50 | using std::runtime_error::runtime_error; | ||
| 51 | }; | ||
| 52 | |||
| 53 | /// Generates code to use for a swizzle operation. | ||
| 54 | static std::string GetSwizzle(u64 elem) { | ||
| 55 | ASSERT(elem <= 3); | ||
| 56 | std::string swizzle = "."; | ||
| 57 | swizzle += "xyzw"[elem]; | ||
| 58 | return swizzle; | ||
| 59 | } | ||
| 60 | |||
| 61 | /// Translate topology | ||
| 62 | static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 63 | switch (topology) { | ||
| 64 | case Tegra::Shader::OutputTopology::PointList: | ||
| 65 | return "points"; | ||
| 66 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 67 | return "line_strip"; | ||
| 68 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 69 | return "triangle_strip"; | ||
| 70 | default: | ||
| 71 | UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); | ||
| 72 | return "points"; | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | /// Describes the behaviour of code path of a given entry point and a return point. | ||
| 77 | enum class ExitMethod { | ||
| 78 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. | ||
| 79 | AlwaysReturn, ///< All code paths reach the return point. | ||
| 80 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. | ||
| 81 | AlwaysEnd, ///< All code paths reach a END instruction. | ||
| 82 | }; | ||
| 83 | |||
| 84 | /// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction. | ||
| 85 | struct Subroutine { | ||
| 86 | /// Generates a name suitable for GLSL source code. | ||
| 87 | std::string GetName() const { | ||
| 88 | return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix; | ||
| 89 | } | ||
| 90 | |||
| 91 | u32 begin; ///< Entry point of the subroutine. | ||
| 92 | u32 end; ///< Return point of the subroutine. | ||
| 93 | const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name | ||
| 94 | ExitMethod exit_method; ///< Exit method of the subroutine. | ||
| 95 | std::set<u32> labels; ///< Addresses refereced by JMP instructions. | ||
| 96 | |||
| 97 | bool operator<(const Subroutine& rhs) const { | ||
| 98 | return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); | ||
| 99 | } | ||
| 100 | }; | ||
| 101 | |||
| 102 | /// Analyzes shader code and produces a set of subroutines. | ||
| 103 | class ControlFlowAnalyzer { | ||
| 104 | public: | ||
| 105 | ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix) | ||
| 106 | : program_code(program_code), shader_coverage_begin(main_offset), | ||
| 107 | shader_coverage_end(main_offset + 1) { | ||
| 108 | |||
| 109 | // Recursively finds all subroutines. | ||
| 110 | const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix); | ||
| 111 | if (program_main.exit_method != ExitMethod::AlwaysEnd) | ||
| 112 | throw DecompileFail("Program does not always end"); | ||
| 113 | } | ||
| 114 | |||
| 115 | std::set<Subroutine> GetSubroutines() { | ||
| 116 | return std::move(subroutines); | ||
| 117 | } | ||
| 118 | |||
| 119 | std::size_t GetShaderLength() const { | ||
| 120 | return shader_coverage_end * sizeof(u64); | ||
| 121 | } | ||
| 122 | |||
| 123 | private: | ||
| 124 | const ProgramCode& program_code; | ||
| 125 | std::set<Subroutine> subroutines; | ||
| 126 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||
| 127 | u32 shader_coverage_begin; | ||
| 128 | u32 shader_coverage_end; | ||
| 129 | |||
| 130 | /// Adds and analyzes a new subroutine if it is not added yet. | ||
| 131 | const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) { | ||
| 132 | Subroutine subroutine{begin, end, suffix, ExitMethod::Undetermined, {}}; | ||
| 133 | |||
| 134 | const auto iter = subroutines.find(subroutine); | ||
| 135 | if (iter != subroutines.end()) { | ||
| 136 | return *iter; | ||
| 137 | } | ||
| 138 | |||
| 139 | subroutine.exit_method = Scan(begin, end, subroutine.labels); | ||
| 140 | if (subroutine.exit_method == ExitMethod::Undetermined) { | ||
| 141 | throw DecompileFail("Recursive function detected"); | ||
| 142 | } | ||
| 143 | |||
| 144 | return *subroutines.insert(std::move(subroutine)).first; | ||
| 145 | } | ||
| 146 | |||
| 147 | /// Merges exit method of two parallel branches. | ||
| 148 | static ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 149 | if (a == ExitMethod::Undetermined) { | ||
| 150 | return b; | ||
| 151 | } | ||
| 152 | if (b == ExitMethod::Undetermined) { | ||
| 153 | return a; | ||
| 154 | } | ||
| 155 | if (a == b) { | ||
| 156 | return a; | ||
| 157 | } | ||
| 158 | return ExitMethod::Conditional; | ||
| 159 | } | ||
| 160 | |||
| 161 | /// Scans a range of code for labels and determines the exit method. | ||
| 162 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) { | ||
| 163 | const auto [iter, inserted] = | ||
| 164 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | ||
| 165 | ExitMethod& exit_method = iter->second; | ||
| 166 | if (!inserted) | ||
| 167 | return exit_method; | ||
| 168 | |||
| 169 | for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { | ||
| 170 | shader_coverage_begin = std::min(shader_coverage_begin, offset); | ||
| 171 | shader_coverage_end = std::max(shader_coverage_end, offset + 1); | ||
| 172 | |||
| 173 | const Instruction instr = {program_code[offset]}; | ||
| 174 | if (const auto opcode = OpCode::Decode(instr)) { | ||
| 175 | switch (opcode->get().GetId()) { | ||
| 176 | case OpCode::Id::EXIT: { | ||
| 177 | // The EXIT instruction can be predicated, which means that the shader can | ||
| 178 | // conditionally end on this instruction. We have to consider the case where the | ||
| 179 | // condition is not met and check the exit method of that other basic block. | ||
| 180 | using Tegra::Shader::Pred; | ||
| 181 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 182 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 183 | } else { | ||
| 184 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 185 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | case OpCode::Id::BRA: { | ||
| 189 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 190 | labels.insert(target); | ||
| 191 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | ||
| 192 | const ExitMethod jmp = Scan(target, end, labels); | ||
| 193 | return exit_method = ParallelExit(no_jmp, jmp); | ||
| 194 | } | ||
| 195 | case OpCode::Id::SSY: | ||
| 196 | case OpCode::Id::PBK: { | ||
| 197 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 198 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 199 | "Constant buffer branching is not supported"); | ||
| 200 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 201 | labels.insert(target); | ||
| 202 | // Continue scanning for an exit method. | ||
| 203 | break; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } | ||
| 207 | } | ||
| 208 | return exit_method = ExitMethod::AlwaysReturn; | ||
| 209 | } | ||
| 210 | }; | ||
| 211 | |||
| 212 | template <typename T> | ||
| 213 | class ShaderScopedScope { | ||
| 214 | public: | ||
| 215 | explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr) | ||
| 216 | : writer(writer), end_expr(std::move(end_expr)) { | ||
| 217 | |||
| 218 | if (begin_expr.empty()) { | ||
| 219 | writer.AddLine('{'); | ||
| 220 | } else { | ||
| 221 | writer.AddExpression(begin_expr); | ||
| 222 | writer.AddLine(" {"); | ||
| 223 | } | ||
| 224 | ++writer.scope; | ||
| 225 | } | ||
| 226 | |||
| 227 | ShaderScopedScope(const ShaderScopedScope&) = delete; | ||
| 228 | |||
| 229 | ~ShaderScopedScope() { | ||
| 230 | --writer.scope; | ||
| 231 | if (end_expr.empty()) { | ||
| 232 | writer.AddLine('}'); | ||
| 233 | } else { | ||
| 234 | writer.AddExpression("} "); | ||
| 235 | writer.AddExpression(end_expr); | ||
| 236 | writer.AddLine(';'); | ||
| 237 | } | ||
| 238 | } | ||
| 239 | |||
| 240 | ShaderScopedScope& operator=(const ShaderScopedScope&) = delete; | ||
| 241 | |||
| 242 | private: | ||
| 243 | T& writer; | ||
| 244 | std::string end_expr; | ||
| 245 | }; | ||
| 246 | |||
| 247 | class ShaderWriter { | ||
| 248 | public: | ||
| 249 | void AddExpression(std::string_view text) { | ||
| 250 | DEBUG_ASSERT(scope >= 0); | ||
| 251 | if (!text.empty()) { | ||
| 252 | AppendIndentation(); | ||
| 253 | } | ||
| 254 | shader_source += text; | ||
| 255 | } | ||
| 256 | |||
| 257 | void AddLine(std::string_view text) { | ||
| 258 | AddExpression(text); | ||
| 259 | AddNewLine(); | ||
| 260 | } | ||
| 261 | |||
| 262 | void AddLine(char character) { | ||
| 263 | DEBUG_ASSERT(scope >= 0); | ||
| 264 | AppendIndentation(); | ||
| 265 | shader_source += character; | ||
| 266 | AddNewLine(); | ||
| 267 | } | ||
| 268 | |||
| 269 | void AddNewLine() { | ||
| 270 | DEBUG_ASSERT(scope >= 0); | ||
| 271 | shader_source += '\n'; | ||
| 272 | } | ||
| 273 | |||
| 274 | std::string GetResult() { | ||
| 275 | return std::move(shader_source); | ||
| 276 | } | ||
| 277 | |||
| 278 | ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {}, | ||
| 279 | std::string end_expr = {}) { | ||
| 280 | return ShaderScopedScope(*this, begin_expr, end_expr); | ||
| 281 | } | ||
| 282 | |||
| 283 | int scope = 0; | ||
| 284 | |||
| 285 | private: | ||
| 286 | void AppendIndentation() { | ||
| 287 | shader_source.append(static_cast<std::size_t>(scope) * 4, ' '); | ||
| 288 | } | ||
| 289 | |||
| 290 | std::string shader_source; | ||
| 291 | }; | ||
| 292 | |||
| 293 | /** | ||
| 294 | * Represents an emulated shader register, used to track the state of that register for emulation | ||
| 295 | * with GLSL. At this time, a register can be used as a float or an integer. This class is used for | ||
| 296 | * bookkeeping within the GLSL program. | ||
| 297 | */ | ||
| 298 | class GLSLRegister { | ||
| 299 | public: | ||
| 300 | enum class Type { | ||
| 301 | Float, | ||
| 302 | Integer, | ||
| 303 | UnsignedInteger, | ||
| 304 | }; | ||
| 305 | |||
| 306 | GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} | ||
| 307 | |||
| 308 | /// Gets the GLSL type string for a register | ||
| 309 | static std::string GetTypeString() { | ||
| 310 | return "float"; | ||
| 311 | } | ||
| 312 | |||
| 313 | /// Gets the GLSL register prefix string, used for declarations and referencing | ||
| 314 | static std::string GetPrefixString() { | ||
| 315 | return "reg_"; | ||
| 316 | } | ||
| 317 | |||
| 318 | /// Returns a GLSL string representing the current state of the register | ||
| 319 | std::string GetString() const { | ||
| 320 | return GetPrefixString() + std::to_string(index) + '_' + suffix; | ||
| 321 | } | ||
| 322 | |||
| 323 | /// Returns the index of the register | ||
| 324 | std::size_t GetIndex() const { | ||
| 325 | return index; | ||
| 326 | } | ||
| 327 | |||
| 328 | private: | ||
| 329 | const std::size_t index; | ||
| 330 | const std::string& suffix; | ||
| 331 | }; | ||
| 332 | |||
| 333 | /** | ||
| 334 | * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state | ||
| 335 | * of all registers (e.g. whether they are currently being used as Floats or Integers), and | ||
| 336 | * generates the necessary GLSL code to perform conversions as needed. This class is used for | ||
| 337 | * bookkeeping within the GLSL program. | ||
| 338 | */ | ||
| 339 | class GLSLRegisterManager { | ||
| 340 | public: | ||
| 341 | GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations, | ||
| 342 | const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix, | ||
| 343 | const Tegra::Shader::Header& header) | ||
| 344 | : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, | ||
| 345 | fixed_pipeline_output_attributes_used{}, local_memory_size{0} { | ||
| 346 | BuildRegisterList(); | ||
| 347 | BuildInputList(); | ||
| 348 | } | ||
| 349 | |||
| 350 | void SetConditionalCodesFromExpression(const std::string& expresion) { | ||
| 351 | SetInternalFlag(InternalFlag::ZeroFlag, "(" + expresion + ") == 0"); | ||
| 352 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete."); | ||
| 353 | } | ||
| 354 | |||
| 355 | void SetConditionalCodesFromRegister(const Register& reg, u64 dest_elem = 0) { | ||
| 356 | SetConditionalCodesFromExpression(GetRegister(reg, static_cast<u32>(dest_elem))); | ||
| 357 | } | ||
| 358 | |||
| 359 | /** | ||
| 360 | * Returns code that does an integer size conversion for the specified size. | ||
| 361 | * @param value Value to perform integer size conversion on. | ||
| 362 | * @param size Register size to use for conversion instructions. | ||
| 363 | * @returns GLSL string corresponding to the value converted to the specified size. | ||
| 364 | */ | ||
| 365 | static std::string ConvertIntegerSize(const std::string& value, Register::Size size) { | ||
| 366 | switch (size) { | ||
| 367 | case Register::Size::Byte: | ||
| 368 | return "((" + value + " << 24) >> 24)"; | ||
| 369 | case Register::Size::Short: | ||
| 370 | return "((" + value + " << 16) >> 16)"; | ||
| 371 | case Register::Size::Word: | ||
| 372 | // Default - do nothing | ||
| 373 | return value; | ||
| 374 | default: | ||
| 375 | UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); | ||
| 376 | return value; | ||
| 377 | } | ||
| 378 | } | ||
| 379 | |||
| 380 | /** | ||
| 381 | * Gets a register as an float. | ||
| 382 | * @param reg The register to get. | ||
| 383 | * @param elem The element to use for the operation. | ||
| 384 | * @returns GLSL string corresponding to the register as a float. | ||
| 385 | */ | ||
| 386 | std::string GetRegisterAsFloat(const Register& reg, unsigned elem = 0) { | ||
| 387 | return GetRegister(reg, elem); | ||
| 388 | } | ||
| 389 | |||
| 390 | /** | ||
| 391 | * Gets a register as an integer. | ||
| 392 | * @param reg The register to get. | ||
| 393 | * @param elem The element to use for the operation. | ||
| 394 | * @param is_signed Whether to get the register as a signed (or unsigned) integer. | ||
| 395 | * @param size Register size to use for conversion instructions. | ||
| 396 | * @returns GLSL string corresponding to the register as an integer. | ||
| 397 | */ | ||
| 398 | std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true, | ||
| 399 | Register::Size size = Register::Size::Word) { | ||
| 400 | const std::string func{is_signed ? "floatBitsToInt" : "floatBitsToUint"}; | ||
| 401 | const std::string value{func + '(' + GetRegister(reg, elem) + ')'}; | ||
| 402 | return ConvertIntegerSize(value, size); | ||
| 403 | } | ||
| 404 | |||
| 405 | /** | ||
| 406 | * Writes code that does a register assignment to float value operation. | ||
| 407 | * @param reg The destination register to use. | ||
| 408 | * @param elem The element to use for the operation. | ||
| 409 | * @param value The code representing the value to assign. | ||
| 410 | * @param dest_num_components Number of components in the destination. | ||
| 411 | * @param value_num_components Number of components in the value. | ||
| 412 | * @param is_saturated Optional, when True, saturates the provided value. | ||
| 413 | * @param sets_cc Optional, when True, sets the corresponding values to the implemented | ||
| 414 | * condition flags. | ||
| 415 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 416 | */ | ||
| 417 | void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, | ||
| 418 | u64 dest_num_components, u64 value_num_components, | ||
| 419 | bool is_saturated = false, bool sets_cc = false, u64 dest_elem = 0, | ||
| 420 | bool precise = false) { | ||
| 421 | const std::string clamped_value = is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value; | ||
| 422 | SetRegister(reg, elem, clamped_value, dest_num_components, value_num_components, dest_elem, | ||
| 423 | precise); | ||
| 424 | if (sets_cc) { | ||
| 425 | if (reg == Register::ZeroIndex) { | ||
| 426 | SetConditionalCodesFromExpression(clamped_value); | ||
| 427 | } else { | ||
| 428 | SetConditionalCodesFromRegister(reg, dest_elem); | ||
| 429 | } | ||
| 430 | } | ||
| 431 | } | ||
| 432 | |||
| 433 | /** | ||
| 434 | * Writes code that does a register assignment to integer value operation. | ||
| 435 | * @param reg The destination register to use. | ||
| 436 | * @param elem The element to use for the operation. | ||
| 437 | * @param value The code representing the value to assign. | ||
| 438 | * @param dest_num_components Number of components in the destination. | ||
| 439 | * @param value_num_components Number of components in the value. | ||
| 440 | * @param is_saturated Optional, when True, saturates the provided value. | ||
| 441 | * @param sets_cc Optional, when True, sets the corresponding values to the implemented | ||
| 442 | * condition flags. | ||
| 443 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 444 | * @param size Register size to use for conversion instructions. | ||
| 445 | */ | ||
| 446 | void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, | ||
| 447 | const std::string& value, u64 dest_num_components, | ||
| 448 | u64 value_num_components, bool is_saturated = false, | ||
| 449 | bool sets_cc = false, u64 dest_elem = 0, | ||
| 450 | Register::Size size = Register::Size::Word) { | ||
| 451 | UNIMPLEMENTED_IF(is_saturated); | ||
| 452 | const std::string final_value = ConvertIntegerSize(value, size); | ||
| 453 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | ||
| 454 | |||
| 455 | SetRegister(reg, elem, func + '(' + final_value + ')', dest_num_components, | ||
| 456 | value_num_components, dest_elem, false); | ||
| 457 | |||
| 458 | if (sets_cc) { | ||
| 459 | if (reg == Register::ZeroIndex) { | ||
| 460 | SetConditionalCodesFromExpression(final_value); | ||
| 461 | } else { | ||
| 462 | SetConditionalCodesFromRegister(reg, dest_elem); | ||
| 463 | } | ||
| 464 | } | ||
| 465 | } | ||
| 466 | |||
| 467 | /** | ||
| 468 | * Writes code that does a register assignment to a half float value operation. | ||
| 469 | * @param reg The destination register to use. | ||
| 470 | * @param elem The element to use for the operation. | ||
| 471 | * @param value The code representing the value to assign. Type has to be half float. | ||
| 472 | * @param merge Half float kind of assignment. | ||
| 473 | * @param dest_num_components Number of components in the destination. | ||
| 474 | * @param value_num_components Number of components in the value. | ||
| 475 | * @param is_saturated Optional, when True, saturates the provided value. | ||
| 476 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 477 | */ | ||
| 478 | void SetRegisterToHalfFloat(const Register& reg, u64 elem, const std::string& value, | ||
| 479 | Tegra::Shader::HalfMerge merge, u64 dest_num_components, | ||
| 480 | u64 value_num_components, bool is_saturated = false, | ||
| 481 | u64 dest_elem = 0) { | ||
| 482 | UNIMPLEMENTED_IF(is_saturated); | ||
| 483 | |||
| 484 | const std::string result = [&]() { | ||
| 485 | switch (merge) { | ||
| 486 | case Tegra::Shader::HalfMerge::H0_H1: | ||
| 487 | return "uintBitsToFloat(packHalf2x16(" + value + "))"; | ||
| 488 | case Tegra::Shader::HalfMerge::F32: | ||
| 489 | // Half float instructions take the first component when doing a float cast. | ||
| 490 | return "float(" + value + ".x)"; | ||
| 491 | case Tegra::Shader::HalfMerge::Mrg_H0: | ||
| 492 | // TODO(Rodrigo): I guess Mrg_H0 and Mrg_H1 take their respective component from the | ||
| 493 | // pack. I couldn't test this on hardware but it shouldn't really matter since most | ||
| 494 | // of the time when a Mrg_* flag is used both components will be mirrored. That | ||
| 495 | // being said, it deserves a test. | ||
| 496 | return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) + | ||
| 497 | " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))"; | ||
| 498 | case Tegra::Shader::HalfMerge::Mrg_H1: | ||
| 499 | return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) + | ||
| 500 | " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))"; | ||
| 501 | default: | ||
| 502 | UNREACHABLE(); | ||
| 503 | return std::string("0"); | ||
| 504 | } | ||
| 505 | }(); | ||
| 506 | |||
| 507 | SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false); | ||
| 508 | } | ||
| 509 | |||
| 510 | /** | ||
| 511 | * Writes code that does a register assignment to input attribute operation. Input attributes | ||
| 512 | * are stored as floats, so this may require conversion. | ||
| 513 | * @param reg The destination register to use. | ||
| 514 | * @param elem The element to use for the operation. | ||
| 515 | * @param attribute The input attribute to use as the source value. | ||
| 516 | * @param input_mode The input mode. | ||
| 517 | * @param vertex The register that decides which vertex to read from (used in GS). | ||
| 518 | */ | ||
| 519 | void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute, | ||
| 520 | const Tegra::Shader::IpaMode& input_mode, | ||
| 521 | std::optional<Register> vertex = {}) { | ||
| 522 | const std::string dest = GetRegisterAsFloat(reg); | ||
| 523 | const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem); | ||
| 524 | shader.AddLine(dest + " = " + src + ';'); | ||
| 525 | } | ||
| 526 | |||
| 527 | std::string GetLocalMemoryAsFloat(const std::string& index) { | ||
| 528 | return "lmem[" + index + ']'; | ||
| 529 | } | ||
| 530 | |||
| 531 | std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) { | ||
| 532 | const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"}; | ||
| 533 | return func + "(lmem[" + index + "])"; | ||
| 534 | } | ||
| 535 | |||
| 536 | void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) { | ||
| 537 | shader.AddLine("lmem[" + index + "] = " + value + ';'); | ||
| 538 | } | ||
| 539 | |||
| 540 | void SetLocalMemoryAsInteger(const std::string& index, const std::string& value, | ||
| 541 | bool is_signed = false) { | ||
| 542 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | ||
| 543 | shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");"); | ||
| 544 | } | ||
| 545 | |||
| 546 | std::string GetConditionCode(const Tegra::Shader::ConditionCode cc) const { | ||
| 547 | switch (cc) { | ||
| 548 | case Tegra::Shader::ConditionCode::NEU: | ||
| 549 | return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')'; | ||
| 550 | default: | ||
| 551 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); | ||
| 552 | return "false"; | ||
| 553 | } | ||
| 554 | } | ||
| 555 | |||
| 556 | std::string GetInternalFlag(const InternalFlag flag) const { | ||
| 557 | const auto index = static_cast<u32>(flag); | ||
| 558 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | ||
| 559 | |||
| 560 | return std::string(INTERNAL_FLAG_NAMES[index]) + '_' + suffix; | ||
| 561 | } | ||
| 562 | |||
| 563 | void SetInternalFlag(const InternalFlag flag, const std::string& value) const { | ||
| 564 | shader.AddLine(GetInternalFlag(flag) + " = " + value + ';'); | ||
| 565 | } | ||
| 566 | |||
| 567 | /** | ||
| 568 | * Writes code that does a output attribute assignment to register operation. Output attributes | ||
| 569 | * are stored as floats, so this may require conversion. | ||
| 570 | * @param attribute The destination output attribute. | ||
| 571 | * @param elem The element to use for the operation. | ||
| 572 | * @param val_reg The register to use as the source value. | ||
| 573 | * @param buf_reg The register that tells which buffer to write to (used in geometry shaders). | ||
| 574 | */ | ||
| 575 | void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& val_reg, | ||
| 576 | const Register& buf_reg) { | ||
| 577 | const std::string dest = GetOutputAttribute(attribute); | ||
| 578 | const std::string src = GetRegisterAsFloat(val_reg); | ||
| 579 | if (dest.empty()) | ||
| 580 | return; | ||
| 581 | |||
| 582 | // Can happen with unknown/unimplemented output attributes, in which case we ignore the | ||
| 583 | // instruction for now. | ||
| 584 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { | ||
| 585 | // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry | ||
| 586 | // shader. These instructions use a dirty register as buffer index, to avoid some | ||
| 587 | // drivers from complaining about out of boundary writes, guard them. | ||
| 588 | const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " + | ||
| 589 | std::to_string(MAX_GEOMETRY_BUFFERS) + ')'}; | ||
| 590 | shader.AddLine("amem[" + buf_index + "][" + | ||
| 591 | std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) + | ||
| 592 | " = " + src + ';'); | ||
| 593 | return; | ||
| 594 | } | ||
| 595 | |||
| 596 | switch (attribute) { | ||
| 597 | case Attribute::Index::ClipDistances0123: | ||
| 598 | case Attribute::Index::ClipDistances4567: { | ||
| 599 | const u64 index = (attribute == Attribute::Index::ClipDistances4567 ? 4 : 0) + elem; | ||
| 600 | UNIMPLEMENTED_IF_MSG( | ||
| 601 | ((header.vtg.clip_distances >> index) & 1) == 0, | ||
| 602 | "Shader is setting gl_ClipDistance{} without enabling it in the header", index); | ||
| 603 | |||
| 604 | clip_distances[index] = true; | ||
| 605 | fixed_pipeline_output_attributes_used.insert(attribute); | ||
| 606 | shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';'); | ||
| 607 | break; | ||
| 608 | } | ||
| 609 | case Attribute::Index::PointSize: | ||
| 610 | fixed_pipeline_output_attributes_used.insert(attribute); | ||
| 611 | shader.AddLine(dest + " = " + src + ';'); | ||
| 612 | break; | ||
| 613 | default: | ||
| 614 | shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); | ||
| 615 | break; | ||
| 616 | } | ||
| 617 | } | ||
| 618 | |||
| 619 | /// Generates code representing a uniform (C buffer) register, interpreted as the input type. | ||
| 620 | std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type, | ||
| 621 | Register::Size size = Register::Size::Word) { | ||
| 622 | declr_const_buffers[index].MarkAsUsed(index, offset, stage); | ||
| 623 | std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" + | ||
| 624 | std::to_string(offset % 4) + ']'; | ||
| 625 | |||
| 626 | if (type == GLSLRegister::Type::Float) { | ||
| 627 | // Do nothing, default | ||
| 628 | } else if (type == GLSLRegister::Type::Integer) { | ||
| 629 | value = "floatBitsToInt(" + value + ')'; | ||
| 630 | } else if (type == GLSLRegister::Type::UnsignedInteger) { | ||
| 631 | value = "floatBitsToUint(" + value + ')'; | ||
| 632 | } else { | ||
| 633 | UNREACHABLE(); | ||
| 634 | } | ||
| 635 | |||
| 636 | return ConvertIntegerSize(value, size); | ||
| 637 | } | ||
| 638 | |||
| 639 | std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str, | ||
| 640 | GLSLRegister::Type type) { | ||
| 641 | declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage); | ||
| 642 | |||
| 643 | const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4); | ||
| 644 | const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" + | ||
| 645 | final_offset + " % 4]"; | ||
| 646 | |||
| 647 | if (type == GLSLRegister::Type::Float) { | ||
| 648 | return value; | ||
| 649 | } else if (type == GLSLRegister::Type::Integer) { | ||
| 650 | return "floatBitsToInt(" + value + ')'; | ||
| 651 | } else { | ||
| 652 | UNREACHABLE(); | ||
| 653 | return value; | ||
| 654 | } | ||
| 655 | } | ||
| 656 | |||
| 657 | /// Add declarations. | ||
| 658 | void GenerateDeclarations(const std::string& suffix) { | ||
| 659 | GenerateVertex(); | ||
| 660 | GenerateRegisters(suffix); | ||
| 661 | GenerateLocalMemory(); | ||
| 662 | GenerateInternalFlags(); | ||
| 663 | GenerateInputAttrs(); | ||
| 664 | GenerateOutputAttrs(); | ||
| 665 | GenerateConstBuffers(); | ||
| 666 | GenerateSamplers(); | ||
| 667 | GenerateGeometry(); | ||
| 668 | } | ||
| 669 | |||
| 670 | /// Returns a list of constant buffer declarations. | ||
| 671 | std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const { | ||
| 672 | std::vector<ConstBufferEntry> result; | ||
| 673 | std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(), | ||
| 674 | std::back_inserter(result), [](const auto& entry) { return entry.IsUsed(); }); | ||
| 675 | return result; | ||
| 676 | } | ||
| 677 | |||
| 678 | /// Returns a list of samplers used in the shader. | ||
| 679 | const std::vector<SamplerEntry>& GetSamplers() const { | ||
| 680 | return used_samplers; | ||
| 681 | } | ||
| 682 | |||
| 683 | /// Returns an array of the used clip distances. | ||
| 684 | const std::array<bool, Maxwell::NumClipDistances>& GetClipDistances() const { | ||
| 685 | return clip_distances; | ||
| 686 | } | ||
| 687 | |||
| 688 | /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if | ||
| 689 | /// necessary. | ||
| 690 | std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, | ||
| 691 | bool is_array, bool is_shadow) { | ||
| 692 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | ||
| 693 | |||
| 694 | // If this sampler has already been used, return the existing mapping. | ||
| 695 | const auto itr = | ||
| 696 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 697 | [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; }); | ||
| 698 | |||
| 699 | if (itr != used_samplers.end()) { | ||
| 700 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 701 | itr->IsShadow() == is_shadow); | ||
| 702 | return itr->GetName(); | ||
| 703 | } | ||
| 704 | |||
| 705 | // Otherwise create a new mapping for this sampler | ||
| 706 | const std::size_t next_index = used_samplers.size(); | ||
| 707 | const SamplerEntry entry{stage, offset, next_index, type, is_array, is_shadow}; | ||
| 708 | used_samplers.emplace_back(entry); | ||
| 709 | return entry.GetName(); | ||
| 710 | } | ||
| 711 | |||
| 712 | void SetLocalMemory(u64 lmem) { | ||
| 713 | local_memory_size = lmem; | ||
| 714 | } | ||
| 715 | |||
| 716 | private: | ||
| 717 | /// Generates declarations for registers. | ||
| 718 | void GenerateRegisters(const std::string& suffix) { | ||
| 719 | for (const auto& reg : regs) { | ||
| 720 | declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() + | ||
| 721 | std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;"); | ||
| 722 | } | ||
| 723 | declarations.AddNewLine(); | ||
| 724 | } | ||
| 725 | |||
| 726 | /// Generates declarations for local memory. | ||
| 727 | void GenerateLocalMemory() { | ||
| 728 | if (local_memory_size > 0) { | ||
| 729 | declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + | ||
| 730 | "];"); | ||
| 731 | declarations.AddNewLine(); | ||
| 732 | } | ||
| 733 | } | ||
| 734 | |||
| 735 | /// Generates declarations for internal flags. | ||
| 736 | void GenerateInternalFlags() { | ||
| 737 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { | ||
| 738 | const InternalFlag code = static_cast<InternalFlag>(flag); | ||
| 739 | declarations.AddLine("bool " + GetInternalFlag(code) + " = false;"); | ||
| 740 | } | ||
| 741 | declarations.AddNewLine(); | ||
| 742 | } | ||
| 743 | |||
| 744 | /// Generates declarations for input attributes. | ||
| 745 | void GenerateInputAttrs() { | ||
| 746 | for (const auto element : declr_input_attribute) { | ||
| 747 | // TODO(bunnei): Use proper number of elements for these | ||
| 748 | u32 idx = | ||
| 749 | static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 750 | if (stage != Maxwell3D::Regs::ShaderStage::Vertex) { | ||
| 751 | // If inputs are varyings, add an offset | ||
| 752 | idx += GENERIC_VARYING_START_LOCATION; | ||
| 753 | } | ||
| 754 | |||
| 755 | std::string attr{GetInputAttribute(element.first, element.second)}; | ||
| 756 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { | ||
| 757 | attr = "gs_" + attr + "[]"; | ||
| 758 | } | ||
| 759 | declarations.AddLine("layout (location = " + std::to_string(idx) + ") " + | ||
| 760 | GetInputFlags(element.first) + "in vec4 " + attr + ';'); | ||
| 761 | } | ||
| 762 | |||
| 763 | declarations.AddNewLine(); | ||
| 764 | } | ||
| 765 | |||
| 766 | /// Generates declarations for output attributes. | ||
| 767 | void GenerateOutputAttrs() { | ||
| 768 | for (const auto& index : declr_output_attribute) { | ||
| 769 | // TODO(bunnei): Use proper number of elements for these | ||
| 770 | const u32 idx = static_cast<u32>(index) - | ||
| 771 | static_cast<u32>(Attribute::Index::Attribute_0) + | ||
| 772 | GENERIC_VARYING_START_LOCATION; | ||
| 773 | declarations.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " + | ||
| 774 | GetOutputAttribute(index) + ';'); | ||
| 775 | } | ||
| 776 | declarations.AddNewLine(); | ||
| 777 | } | ||
| 778 | |||
| 779 | /// Generates declarations for constant buffers. | ||
| 780 | void GenerateConstBuffers() { | ||
| 781 | for (const auto& entry : GetConstBuffersDeclarations()) { | ||
| 782 | declarations.AddLine("layout (std140) uniform " + entry.GetName()); | ||
| 783 | declarations.AddLine('{'); | ||
| 784 | declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) + | ||
| 785 | "[MAX_CONSTBUFFER_ELEMENTS];"); | ||
| 786 | declarations.AddLine("};"); | ||
| 787 | declarations.AddNewLine(); | ||
| 788 | } | ||
| 789 | declarations.AddNewLine(); | ||
| 790 | } | ||
| 791 | |||
| 792 | /// Generates declarations for samplers. | ||
| 793 | void GenerateSamplers() { | ||
| 794 | const auto& samplers = GetSamplers(); | ||
| 795 | for (const auto& sampler : samplers) { | ||
| 796 | declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() + | ||
| 797 | ';'); | ||
| 798 | } | ||
| 799 | declarations.AddNewLine(); | ||
| 800 | } | ||
| 801 | |||
| 802 | /// Generates declarations used for geometry shaders. | ||
| 803 | void GenerateGeometry() { | ||
| 804 | if (stage != Maxwell3D::Regs::ShaderStage::Geometry) | ||
| 805 | return; | ||
| 806 | |||
| 807 | declarations.AddLine( | ||
| 808 | "layout (" + GetTopologyName(header.common3.output_topology) + | ||
| 809 | ", max_vertices = " + std::to_string(header.common4.max_output_vertices) + ") out;"); | ||
| 810 | declarations.AddNewLine(); | ||
| 811 | |||
| 812 | declarations.AddLine("vec4 amem[" + std::to_string(MAX_GEOMETRY_BUFFERS) + "][" + | ||
| 813 | std::to_string(MAX_ATTRIBUTES) + "];"); | ||
| 814 | declarations.AddNewLine(); | ||
| 815 | |||
| 816 | constexpr char buffer[] = "amem[output_buffer]"; | ||
| 817 | declarations.AddLine("void emit_vertex(uint output_buffer) {"); | ||
| 818 | ++declarations.scope; | ||
| 819 | for (const auto element : declr_output_attribute) { | ||
| 820 | declarations.AddLine(GetOutputAttribute(element) + " = " + buffer + '[' + | ||
| 821 | std::to_string(static_cast<u32>(element)) + "];"); | ||
| 822 | } | ||
| 823 | |||
| 824 | declarations.AddLine("position = " + std::string(buffer) + '[' + | ||
| 825 | std::to_string(static_cast<u32>(Attribute::Index::Position)) + "];"); | ||
| 826 | |||
| 827 | // If a geometry shader is attached, it will always flip (it's the last stage before | ||
| 828 | // fragment). For more info about flipping, refer to gl_shader_gen.cpp. | ||
| 829 | declarations.AddLine("position.xy *= viewport_flip.xy;"); | ||
| 830 | declarations.AddLine("gl_Position = position;"); | ||
| 831 | declarations.AddLine("position.w = 1.0;"); | ||
| 832 | declarations.AddLine("EmitVertex();"); | ||
| 833 | --declarations.scope; | ||
| 834 | declarations.AddLine('}'); | ||
| 835 | declarations.AddNewLine(); | ||
| 836 | } | ||
| 837 | |||
| 838 | void GenerateVertex() { | ||
| 839 | if (stage != Maxwell3D::Regs::ShaderStage::Vertex) | ||
| 840 | return; | ||
| 841 | bool clip_distances_declared = false; | ||
| 842 | |||
| 843 | declarations.AddLine("out gl_PerVertex {"); | ||
| 844 | ++declarations.scope; | ||
| 845 | declarations.AddLine("vec4 gl_Position;"); | ||
| 846 | for (auto& o : fixed_pipeline_output_attributes_used) { | ||
| 847 | if (o == Attribute::Index::PointSize) | ||
| 848 | declarations.AddLine("float gl_PointSize;"); | ||
| 849 | if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || | ||
| 850 | o == Attribute::Index::ClipDistances4567)) { | ||
| 851 | declarations.AddLine("float gl_ClipDistance[];"); | ||
| 852 | clip_distances_declared = true; | ||
| 853 | } | ||
| 854 | } | ||
| 855 | --declarations.scope; | ||
| 856 | declarations.AddLine("};"); | ||
| 857 | } | ||
| 858 | |||
| 859 | /// Generates code representing a temporary (GPR) register. | ||
| 860 | std::string GetRegister(const Register& reg, unsigned elem) { | ||
| 861 | if (reg == Register::ZeroIndex) { | ||
| 862 | return "0"; | ||
| 863 | } | ||
| 864 | |||
| 865 | return regs[reg.GetSwizzledIndex(elem)].GetString(); | ||
| 866 | } | ||
| 867 | |||
| 868 | /** | ||
| 869 | * Writes code that does a register assignment to value operation. | ||
| 870 | * @param reg The destination register to use. | ||
| 871 | * @param elem The element to use for the operation. | ||
| 872 | * @param value The code representing the value to assign. | ||
| 873 | * @param dest_num_components Number of components in the destination. | ||
| 874 | * @param value_num_components Number of components in the value. | ||
| 875 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 876 | */ | ||
| 877 | void SetRegister(const Register& reg, u64 elem, const std::string& value, | ||
| 878 | u64 dest_num_components, u64 value_num_components, u64 dest_elem, | ||
| 879 | bool precise) { | ||
| 880 | if (reg == Register::ZeroIndex) { | ||
| 881 | // Setting RZ is a nop in hardware. | ||
| 882 | return; | ||
| 883 | } | ||
| 884 | |||
| 885 | std::string dest = GetRegister(reg, static_cast<u32>(dest_elem)); | ||
| 886 | if (dest_num_components > 1) { | ||
| 887 | dest += GetSwizzle(elem); | ||
| 888 | } | ||
| 889 | |||
| 890 | std::string src = '(' + value + ')'; | ||
| 891 | if (value_num_components > 1) { | ||
| 892 | src += GetSwizzle(elem); | ||
| 893 | } | ||
| 894 | |||
| 895 | if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 896 | const auto scope = shader.Scope(); | ||
| 897 | |||
| 898 | // This avoids optimizations of constant propagation and keeps the code as the original | ||
| 899 | // Sadly using the precise keyword causes "linking" errors on fragment shaders. | ||
| 900 | shader.AddLine("precise float tmp = " + src + ';'); | ||
| 901 | shader.AddLine(dest + " = tmp;"); | ||
| 902 | } else { | ||
| 903 | shader.AddLine(dest + " = " + src + ';'); | ||
| 904 | } | ||
| 905 | } | ||
| 906 | |||
| 907 | /// Build the GLSL register list. | ||
| 908 | void BuildRegisterList() { | ||
| 909 | regs.reserve(Register::NumRegisters); | ||
| 910 | |||
| 911 | for (std::size_t index = 0; index < Register::NumRegisters; ++index) { | ||
| 912 | regs.emplace_back(index, suffix); | ||
| 913 | } | ||
| 914 | } | ||
| 915 | |||
| 916 | void BuildInputList() { | ||
| 917 | const u32 size = static_cast<u32>(Attribute::Index::Attribute_31) - | ||
| 918 | static_cast<u32>(Attribute::Index::Attribute_0) + 1; | ||
| 919 | declr_input_attribute.reserve(size); | ||
| 920 | } | ||
| 921 | |||
| 922 | /// Generates code representing an input attribute register. | ||
| 923 | std::string GetInputAttribute(Attribute::Index attribute, | ||
| 924 | const Tegra::Shader::IpaMode& input_mode, | ||
| 925 | std::optional<Register> vertex = {}) { | ||
| 926 | auto GeometryPass = [&](const std::string& name) { | ||
| 927 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) { | ||
| 928 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games set | ||
| 929 | // an 0x80000000 index for those and the shader fails to build. Find out why this | ||
| 930 | // happens and what's its intent. | ||
| 931 | return "gs_" + name + '[' + GetRegisterAsInteger(*vertex, 0, false) + | ||
| 932 | " % MAX_VERTEX_INPUT]"; | ||
| 933 | } | ||
| 934 | return name; | ||
| 935 | }; | ||
| 936 | |||
| 937 | switch (attribute) { | ||
| 938 | case Attribute::Index::Position: | ||
| 939 | if (stage != Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 940 | return GeometryPass("position"); | ||
| 941 | } else { | ||
| 942 | return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)"; | ||
| 943 | } | ||
| 944 | case Attribute::Index::PointCoord: | ||
| 945 | return "vec4(gl_PointCoord.x, gl_PointCoord.y, 0, 0)"; | ||
| 946 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 947 | // TODO(Subv): Find out what the values are for the first two elements when inside a | ||
| 948 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | ||
| 949 | // shader. | ||
| 950 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); | ||
| 951 | // Config pack's first value is instance_id. | ||
| 952 | return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))"; | ||
| 953 | case Attribute::Index::FrontFacing: | ||
| 954 | // TODO(Subv): Find out what the values are for the other elements. | ||
| 955 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | ||
| 956 | return "vec4(0, 0, 0, intBitsToFloat(gl_FrontFacing ? -1 : 0))"; | ||
| 957 | default: | ||
| 958 | const u32 index{static_cast<u32>(attribute) - | ||
| 959 | static_cast<u32>(Attribute::Index::Attribute_0)}; | ||
| 960 | if (attribute >= Attribute::Index::Attribute_0 && | ||
| 961 | attribute <= Attribute::Index::Attribute_31) { | ||
| 962 | if (declr_input_attribute.count(attribute) == 0) { | ||
| 963 | declr_input_attribute[attribute] = input_mode; | ||
| 964 | } else { | ||
| 965 | UNIMPLEMENTED_IF_MSG(declr_input_attribute[attribute] != input_mode, | ||
| 966 | "Multiple input modes for the same attribute"); | ||
| 967 | } | ||
| 968 | return GeometryPass("input_attribute_" + std::to_string(index)); | ||
| 969 | } | ||
| 970 | |||
| 971 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); | ||
| 972 | } | ||
| 973 | |||
| 974 | return "vec4(0, 0, 0, 0)"; | ||
| 975 | } | ||
| 976 | |||
| 977 | std::string GetInputFlags(const Attribute::Index attribute) { | ||
| 978 | const Tegra::Shader::IpaSampleMode sample_mode = | ||
| 979 | declr_input_attribute[attribute].sampling_mode; | ||
| 980 | const Tegra::Shader::IpaInterpMode interp_mode = | ||
| 981 | declr_input_attribute[attribute].interpolation_mode; | ||
| 982 | std::string out; | ||
| 983 | switch (interp_mode) { | ||
| 984 | case Tegra::Shader::IpaInterpMode::Flat: { | ||
| 985 | out += "flat "; | ||
| 986 | break; | ||
| 987 | } | ||
| 988 | case Tegra::Shader::IpaInterpMode::Linear: { | ||
| 989 | out += "noperspective "; | ||
| 990 | break; | ||
| 991 | } | ||
| 992 | case Tegra::Shader::IpaInterpMode::Perspective: { | ||
| 993 | // Default, Smooth | ||
| 994 | break; | ||
| 995 | } | ||
| 996 | default: { | ||
| 997 | UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); | ||
| 998 | } | ||
| 999 | } | ||
| 1000 | switch (sample_mode) { | ||
| 1001 | case Tegra::Shader::IpaSampleMode::Centroid: | ||
| 1002 | // It can be implemented with the "centroid " keyword in glsl | ||
| 1003 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid"); | ||
| 1004 | break; | ||
| 1005 | case Tegra::Shader::IpaSampleMode::Default: | ||
| 1006 | // Default, n/a | ||
| 1007 | break; | ||
| 1008 | default: { | ||
| 1009 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode)); | ||
| 1010 | break; | ||
| 1011 | } | ||
| 1012 | } | ||
| 1013 | return out; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | /// Generates code representing the declaration name of an output attribute register. | ||
| 1017 | std::string GetOutputAttribute(Attribute::Index attribute) { | ||
| 1018 | switch (attribute) { | ||
| 1019 | case Attribute::Index::PointSize: | ||
| 1020 | return "gl_PointSize"; | ||
| 1021 | case Attribute::Index::Position: | ||
| 1022 | return "position"; | ||
| 1023 | case Attribute::Index::ClipDistances0123: | ||
| 1024 | case Attribute::Index::ClipDistances4567: { | ||
| 1025 | return "gl_ClipDistance"; | ||
| 1026 | } | ||
| 1027 | default: | ||
| 1028 | const u32 index{static_cast<u32>(attribute) - | ||
| 1029 | static_cast<u32>(Attribute::Index::Attribute_0)}; | ||
| 1030 | if (attribute >= Attribute::Index::Attribute_0) { | ||
| 1031 | declr_output_attribute.insert(attribute); | ||
| 1032 | return "output_attribute_" + std::to_string(index); | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | UNIMPLEMENTED_MSG("Unhandled output attribute={}", index); | ||
| 1036 | return {}; | ||
| 1037 | } | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | ShaderWriter& shader; | ||
| 1041 | ShaderWriter& declarations; | ||
| 1042 | std::vector<GLSLRegister> regs; | ||
| 1043 | std::unordered_map<Attribute::Index, Tegra::Shader::IpaMode> declr_input_attribute; | ||
| 1044 | std::set<Attribute::Index> declr_output_attribute; | ||
| 1045 | std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; | ||
| 1046 | std::vector<SamplerEntry> used_samplers; | ||
| 1047 | const Maxwell3D::Regs::ShaderStage& stage; | ||
| 1048 | const std::string& suffix; | ||
| 1049 | const Tegra::Shader::Header& header; | ||
| 1050 | std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; | ||
| 1051 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | ||
| 1052 | u64 local_memory_size; | ||
| 1053 | }; | ||
| 1054 | |||
| 1055 | class GLSLGenerator { | ||
| 1056 | public: | ||
| 1057 | GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, | ||
| 1058 | u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix, | ||
| 1059 | std::size_t shader_length) | ||
| 1060 | : subroutines(subroutines), program_code(program_code), main_offset(main_offset), | ||
| 1061 | stage(stage), suffix(suffix), shader_length(shader_length) { | ||
| 1062 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||
| 1063 | local_memory_size = header.GetLocalMemorySize(); | ||
| 1064 | regs.SetLocalMemory(local_memory_size); | ||
| 1065 | Generate(suffix); | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | std::string GetShaderCode() { | ||
| 1069 | return declarations.GetResult() + shader.GetResult(); | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | /// Returns entries in the shader that are useful for external functions | ||
| 1073 | ShaderEntries GetEntries() const { | ||
| 1074 | return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), regs.GetClipDistances(), | ||
| 1075 | shader_length}; | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | private: | ||
| 1079 | /// Gets the Subroutine object corresponding to the specified address. | ||
| 1080 | const Subroutine& GetSubroutine(u32 begin, u32 end) const { | ||
| 1081 | const auto iter = subroutines.find(Subroutine{begin, end, suffix}); | ||
| 1082 | ASSERT(iter != subroutines.end()); | ||
| 1083 | return *iter; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | /// Generates code representing a 19-bit immediate value | ||
| 1087 | static std::string GetImmediate19(const Instruction& instr) { | ||
| 1088 | return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19()); | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | /// Generates code representing a 32-bit immediate value | ||
| 1092 | static std::string GetImmediate32(const Instruction& instr) { | ||
| 1093 | return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); | ||
| 1094 | } | ||
| 1095 | |||
| 1096 | /// Generates code representing a vec2 pair unpacked from a half float immediate | ||
| 1097 | static std::string UnpackHalfImmediate(const Instruction& instr, bool negate) { | ||
| 1098 | const std::string immediate = GetHalfFloat(std::to_string(instr.half_imm.PackImmediates())); | ||
| 1099 | if (!negate) { | ||
| 1100 | return immediate; | ||
| 1101 | } | ||
| 1102 | const std::string negate_first = instr.half_imm.first_negate != 0 ? "-" : ""; | ||
| 1103 | const std::string negate_second = instr.half_imm.second_negate != 0 ? "-" : ""; | ||
| 1104 | const std::string negate_vec = "vec2(" + negate_first + "1, " + negate_second + "1)"; | ||
| 1105 | |||
| 1106 | return '(' + immediate + " * " + negate_vec + ')'; | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | /// Generates code representing a texture sampler. | ||
| 1110 | std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, | ||
| 1111 | bool is_shadow) { | ||
| 1112 | return regs.AccessSampler(sampler, type, is_array, is_shadow); | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | /** | ||
| 1116 | * Adds code that calls a subroutine. | ||
| 1117 | * @param subroutine the subroutine to call. | ||
| 1118 | */ | ||
| 1119 | void CallSubroutine(const Subroutine& subroutine) { | ||
| 1120 | if (subroutine.exit_method == ExitMethod::AlwaysEnd) { | ||
| 1121 | shader.AddLine(subroutine.GetName() + "();"); | ||
| 1122 | shader.AddLine("return true;"); | ||
| 1123 | } else if (subroutine.exit_method == ExitMethod::Conditional) { | ||
| 1124 | shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }"); | ||
| 1125 | } else { | ||
| 1126 | shader.AddLine(subroutine.GetName() + "();"); | ||
| 1127 | } | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | /* | ||
| 1131 | * Writes code that assigns a predicate boolean variable. | ||
| 1132 | * @param pred The id of the predicate to write to. | ||
| 1133 | * @param value The expression value to assign to the predicate. | ||
| 1134 | */ | ||
| 1135 | void SetPredicate(u64 pred, const std::string& value) { | ||
| 1136 | using Tegra::Shader::Pred; | ||
| 1137 | // Can't assign to the constant predicate. | ||
| 1138 | ASSERT(pred != static_cast<u64>(Pred::UnusedIndex)); | ||
| 1139 | |||
| 1140 | std::string variable = 'p' + std::to_string(pred) + '_' + suffix; | ||
| 1141 | shader.AddLine(variable + " = " + value + ';'); | ||
| 1142 | declr_predicates.insert(std::move(variable)); | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | /* | ||
| 1146 | * Returns the condition to use in the 'if' for a predicated instruction. | ||
| 1147 | * @param instr Instruction to generate the if condition for. | ||
| 1148 | * @returns string containing the predicate condition. | ||
| 1149 | */ | ||
| 1150 | std::string GetPredicateCondition(u64 index, bool negate) { | ||
| 1151 | using Tegra::Shader::Pred; | ||
| 1152 | std::string variable; | ||
| 1153 | |||
| 1154 | // Index 7 is used as an 'Always True' condition. | ||
| 1155 | if (index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 1156 | variable = "true"; | ||
| 1157 | } else { | ||
| 1158 | variable = 'p' + std::to_string(index) + '_' + suffix; | ||
| 1159 | declr_predicates.insert(variable); | ||
| 1160 | } | ||
| 1161 | if (negate) { | ||
| 1162 | return "!(" + variable + ')'; | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | return variable; | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | /** | ||
| 1169 | * Returns the comparison string to use to compare two values in the 'set' family of | ||
| 1170 | * instructions. | ||
| 1171 | * @param condition The condition used in the 'set'-family instruction. | ||
| 1172 | * @param op_a First operand to use for the comparison. | ||
| 1173 | * @param op_b Second operand to use for the comparison. | ||
| 1174 | * @returns String corresponding to the GLSL operator that matches the desired comparison. | ||
| 1175 | */ | ||
| 1176 | std::string GetPredicateComparison(Tegra::Shader::PredCondition condition, | ||
| 1177 | const std::string& op_a, const std::string& op_b) const { | ||
| 1178 | using Tegra::Shader::PredCondition; | ||
| 1179 | static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = { | ||
| 1180 | {PredCondition::LessThan, "<"}, | ||
| 1181 | {PredCondition::Equal, "=="}, | ||
| 1182 | {PredCondition::LessEqual, "<="}, | ||
| 1183 | {PredCondition::GreaterThan, ">"}, | ||
| 1184 | {PredCondition::NotEqual, "!="}, | ||
| 1185 | {PredCondition::GreaterEqual, ">="}, | ||
| 1186 | {PredCondition::LessThanWithNan, "<"}, | ||
| 1187 | {PredCondition::NotEqualWithNan, "!="}, | ||
| 1188 | {PredCondition::LessEqualWithNan, "<="}, | ||
| 1189 | {PredCondition::GreaterThanWithNan, ">"}, | ||
| 1190 | {PredCondition::GreaterEqualWithNan, ">="}}; | ||
| 1191 | |||
| 1192 | const auto& comparison{PredicateComparisonStrings.find(condition)}; | ||
| 1193 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonStrings.end(), | ||
| 1194 | "Unknown predicate comparison operation"); | ||
| 1195 | |||
| 1196 | std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'}; | ||
| 1197 | if (condition == PredCondition::LessThanWithNan || | ||
| 1198 | condition == PredCondition::NotEqualWithNan || | ||
| 1199 | condition == PredCondition::LessEqualWithNan || | ||
| 1200 | condition == PredCondition::GreaterThanWithNan || | ||
| 1201 | condition == PredCondition::GreaterEqualWithNan) { | ||
| 1202 | predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')'; | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | return predicate; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | /** | ||
| 1209 | * Returns the operator string to use to combine two predicates in the 'setp' family of | ||
| 1210 | * instructions. | ||
| 1211 | * @params operation The operator used in the 'setp'-family instruction. | ||
| 1212 | * @returns String corresponding to the GLSL operator that matches the desired operator. | ||
| 1213 | */ | ||
| 1214 | std::string GetPredicateCombiner(Tegra::Shader::PredOperation operation) const { | ||
| 1215 | using Tegra::Shader::PredOperation; | ||
| 1216 | static const std::unordered_map<PredOperation, const char*> PredicateOperationStrings = { | ||
| 1217 | {PredOperation::And, "&&"}, | ||
| 1218 | {PredOperation::Or, "||"}, | ||
| 1219 | {PredOperation::Xor, "^^"}, | ||
| 1220 | }; | ||
| 1221 | |||
| 1222 | auto op = PredicateOperationStrings.find(operation); | ||
| 1223 | UNIMPLEMENTED_IF_MSG(op == PredicateOperationStrings.end(), "Unknown predicate operation"); | ||
| 1224 | return op->second; | ||
| 1225 | } | ||
| 1226 | |||
| 1227 | /** | ||
| 1228 | * Transforms the input string GLSL operand into one that applies the abs() function and negates | ||
| 1229 | * the output if necessary. When both abs and neg are true, the negation will be applied after | ||
| 1230 | * taking the absolute value. | ||
| 1231 | * @param operand The input operand to take the abs() of, negate, or both. | ||
| 1232 | * @param abs Whether to apply the abs() function to the input operand. | ||
| 1233 | * @param neg Whether to negate the input operand. | ||
| 1234 | * @returns String corresponding to the operand after being transformed by the abs() and | ||
| 1235 | * negation operations. | ||
| 1236 | */ | ||
| 1237 | static std::string GetOperandAbsNeg(const std::string& operand, bool abs, bool neg) { | ||
| 1238 | std::string result = operand; | ||
| 1239 | |||
| 1240 | if (abs) { | ||
| 1241 | result = "abs(" + result + ')'; | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | if (neg) { | ||
| 1245 | result = "-(" + result + ')'; | ||
| 1246 | } | ||
| 1247 | |||
| 1248 | return result; | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | /* | ||
| 1252 | * Transforms the input string GLSL operand into an unpacked half float pair. | ||
| 1253 | * @note This function returns a float type pair instead of a half float pair. This is because | ||
| 1254 | * real half floats are not standardized in GLSL but unpackHalf2x16 (which returns a vec2) is. | ||
| 1255 | * @param operand Input operand. It has to be an unsigned integer. | ||
| 1256 | * @param type How to unpack the unsigned integer to a half float pair. | ||
| 1257 | * @param abs Get the absolute value of unpacked half floats. | ||
| 1258 | * @param neg Get the negative value of unpacked half floats. | ||
| 1259 | * @returns String corresponding to a half float pair. | ||
| 1260 | */ | ||
| 1261 | static std::string GetHalfFloat(const std::string& operand, | ||
| 1262 | Tegra::Shader::HalfType type = Tegra::Shader::HalfType::H0_H1, | ||
| 1263 | bool abs = false, bool neg = false) { | ||
| 1264 | // "vec2" calls emitted in this function are intended to alias components. | ||
| 1265 | const std::string value = [&]() { | ||
| 1266 | switch (type) { | ||
| 1267 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1268 | return "unpackHalf2x16(" + operand + ')'; | ||
| 1269 | case Tegra::Shader::HalfType::F32: | ||
| 1270 | return "vec2(uintBitsToFloat(" + operand + "))"; | ||
| 1271 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1272 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1273 | const bool high = type == Tegra::Shader::HalfType::H1_H1; | ||
| 1274 | const char unpack_index = "xy"[high ? 1 : 0]; | ||
| 1275 | return "vec2(unpackHalf2x16(" + operand + ")." + unpack_index + ')'; | ||
| 1276 | } | ||
| 1277 | default: | ||
| 1278 | UNREACHABLE(); | ||
| 1279 | return std::string("vec2(0)"); | ||
| 1280 | } | ||
| 1281 | }(); | ||
| 1282 | |||
| 1283 | return GetOperandAbsNeg(value, abs, neg); | ||
| 1284 | } | ||
| 1285 | |||
| 1286 | /* | ||
| 1287 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||
| 1288 | * Sched instructions always appear before a sequence of 3 instructions. | ||
| 1289 | */ | ||
| 1290 | bool IsSchedInstruction(u32 offset) const { | ||
| 1291 | // sched instructions appear once every 4 instructions. | ||
| 1292 | static constexpr std::size_t SchedPeriod = 4; | ||
| 1293 | u32 absolute_offset = offset - main_offset; | ||
| 1294 | |||
| 1295 | return (absolute_offset % SchedPeriod) == 0; | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a, | ||
| 1299 | const std::string& op_b, | ||
| 1300 | Tegra::Shader::PredicateResultMode predicate_mode, | ||
| 1301 | Tegra::Shader::Pred predicate, const bool set_cc) { | ||
| 1302 | std::string result{}; | ||
| 1303 | switch (logic_op) { | ||
| 1304 | case LogicOperation::And: { | ||
| 1305 | result = '(' + op_a + " & " + op_b + ')'; | ||
| 1306 | break; | ||
| 1307 | } | ||
| 1308 | case LogicOperation::Or: { | ||
| 1309 | result = '(' + op_a + " | " + op_b + ')'; | ||
| 1310 | break; | ||
| 1311 | } | ||
| 1312 | case LogicOperation::Xor: { | ||
| 1313 | result = '(' + op_a + " ^ " + op_b + ')'; | ||
| 1314 | break; | ||
| 1315 | } | ||
| 1316 | case LogicOperation::PassB: { | ||
| 1317 | result = op_b; | ||
| 1318 | break; | ||
| 1319 | } | ||
| 1320 | default: | ||
| 1321 | UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op)); | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | if (dest != Tegra::Shader::Register::ZeroIndex) { | ||
| 1325 | regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc); | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | using Tegra::Shader::PredicateResultMode; | ||
| 1329 | // Write the predicate value depending on the predicate mode. | ||
| 1330 | switch (predicate_mode) { | ||
| 1331 | case PredicateResultMode::None: | ||
| 1332 | // Do nothing. | ||
| 1333 | return; | ||
| 1334 | case PredicateResultMode::NotZero: | ||
| 1335 | // Set the predicate to true if the result is not zero. | ||
| 1336 | SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0"); | ||
| 1337 | break; | ||
| 1338 | default: | ||
| 1339 | UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", | ||
| 1340 | static_cast<u32>(predicate_mode)); | ||
| 1341 | } | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b, | ||
| 1345 | const std::string& op_c, const std::string& imm_lut, | ||
| 1346 | const bool set_cc) { | ||
| 1347 | if (dest == Tegra::Shader::Register::ZeroIndex) { | ||
| 1348 | return; | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | static constexpr std::array<const char*, 32> shift_amounts = { | ||
| 1352 | "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", | ||
| 1353 | "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", | ||
| 1354 | "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"}; | ||
| 1355 | |||
| 1356 | std::string result; | ||
| 1357 | result += '('; | ||
| 1358 | |||
| 1359 | for (std::size_t i = 0; i < shift_amounts.size(); ++i) { | ||
| 1360 | if (i) | ||
| 1361 | result += '|'; | ||
| 1362 | result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] + | ||
| 1363 | ") & 1) | ((" + op_b + " >> " + shift_amounts[i] + ") & 1) << 1 | ((" + op_a + | ||
| 1364 | " >> " + shift_amounts[i] + ") & 1) << 2)) & 1) << " + shift_amounts[i] + ")"; | ||
| 1365 | } | ||
| 1366 | |||
| 1367 | result += ')'; | ||
| 1368 | |||
| 1369 | regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc); | ||
| 1370 | } | ||
| 1371 | |||
| 1372 | void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) { | ||
| 1373 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 1374 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 1375 | |||
| 1376 | std::size_t written_components = 0; | ||
| 1377 | for (u32 component = 0; component < 4; ++component) { | ||
| 1378 | if (!instr.texs.IsComponentEnabled(component)) { | ||
| 1379 | continue; | ||
| 1380 | } | ||
| 1381 | |||
| 1382 | if (written_components < 2) { | ||
| 1383 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 1384 | regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, false, | ||
| 1385 | written_components % 2); | ||
| 1386 | } else { | ||
| 1387 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 1388 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 1389 | regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, false, | ||
| 1390 | written_components % 2); | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | ++written_components; | ||
| 1394 | } | ||
| 1395 | } | ||
| 1396 | |||
| 1397 | void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) { | ||
| 1398 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 1399 | // float instruction). | ||
| 1400 | |||
| 1401 | std::array<std::string, 4> components; | ||
| 1402 | u32 written_components = 0; | ||
| 1403 | |||
| 1404 | for (u32 component = 0; component < 4; ++component) { | ||
| 1405 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 1406 | continue; | ||
| 1407 | components[written_components++] = texture + GetSwizzle(component); | ||
| 1408 | } | ||
| 1409 | if (written_components == 0) | ||
| 1410 | return; | ||
| 1411 | |||
| 1412 | const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) { | ||
| 1413 | return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')'; | ||
| 1414 | }; | ||
| 1415 | |||
| 1416 | regs.SetRegisterToHalfFloat( | ||
| 1417 | instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1), | ||
| 1418 | Tegra::Shader::HalfMerge::H0_H1, 1, 1); | ||
| 1419 | |||
| 1420 | if (written_components > 2) { | ||
| 1421 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 1422 | regs.SetRegisterToHalfFloat( | ||
| 1423 | instr.gpr28, 0, | ||
| 1424 | BuildComponent(components[2], components[3], written_components > 3), | ||
| 1425 | Tegra::Shader::HalfMerge::H0_H1, 1, 1); | ||
| 1426 | } | ||
| 1427 | } | ||
| 1428 | |||
| 1429 | static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { | ||
| 1430 | switch (texture_type) { | ||
| 1431 | case Tegra::Shader::TextureType::Texture1D: | ||
| 1432 | return 1; | ||
| 1433 | case Tegra::Shader::TextureType::Texture2D: | ||
| 1434 | return 2; | ||
| 1435 | case Tegra::Shader::TextureType::Texture3D: | ||
| 1436 | case Tegra::Shader::TextureType::TextureCube: | ||
| 1437 | return 3; | ||
| 1438 | default: | ||
| 1439 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); | ||
| 1440 | return 0; | ||
| 1441 | } | ||
| 1442 | } | ||
| 1443 | |||
| 1444 | /* | ||
| 1445 | * Emits code to push the input target address to the flow address stack, incrementing the stack | ||
| 1446 | * top. | ||
| 1447 | */ | ||
| 1448 | void EmitPushToFlowStack(u32 target) { | ||
| 1449 | const auto scope = shader.Scope(); | ||
| 1450 | |||
| 1451 | shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;"); | ||
| 1452 | shader.AddLine("flow_stack_top++;"); | ||
| 1453 | } | ||
| 1454 | |||
| 1455 | /* | ||
| 1456 | * Emits code to pop an address from the flow address stack, setting the jump address to the | ||
| 1457 | * popped address and decrementing the stack top. | ||
| 1458 | */ | ||
| 1459 | void EmitPopFromFlowStack() { | ||
| 1460 | const auto scope = shader.Scope(); | ||
| 1461 | |||
| 1462 | shader.AddLine("flow_stack_top--;"); | ||
| 1463 | shader.AddLine("jmp_to = flow_stack[flow_stack_top];"); | ||
| 1464 | shader.AddLine("break;"); | ||
| 1465 | } | ||
| 1466 | |||
| 1467 | /// Writes the output values from a fragment shader to the corresponding GLSL output variables. | ||
| 1468 | void EmitFragmentOutputsWrite() { | ||
| 1469 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | ||
| 1470 | |||
| 1471 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Samplemask write is unimplemented"); | ||
| 1472 | |||
| 1473 | shader.AddLine("if (alpha_test[0] != 0) {"); | ||
| 1474 | ++shader.scope; | ||
| 1475 | // We start on the register containing the alpha value in the first RT. | ||
| 1476 | u32 current_reg = 3; | ||
| 1477 | for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets; | ||
| 1478 | ++render_target) { | ||
| 1479 | // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when | ||
| 1480 | // multiple render targets are used. | ||
| 1481 | if (header.ps.IsColorComponentOutputEnabled(render_target, 0) || | ||
| 1482 | header.ps.IsColorComponentOutputEnabled(render_target, 1) || | ||
| 1483 | header.ps.IsColorComponentOutputEnabled(render_target, 2) || | ||
| 1484 | header.ps.IsColorComponentOutputEnabled(render_target, 3)) { | ||
| 1485 | shader.AddLine(fmt::format("if (!AlphaFunc({})) discard;", | ||
| 1486 | regs.GetRegisterAsFloat(current_reg))); | ||
| 1487 | current_reg += 4; | ||
| 1488 | } | ||
| 1489 | } | ||
| 1490 | --shader.scope; | ||
| 1491 | shader.AddLine('}'); | ||
| 1492 | |||
| 1493 | // Write the color outputs using the data in the shader registers, disabled | ||
| 1494 | // rendertargets/components are skipped in the register assignment. | ||
| 1495 | current_reg = 0; | ||
| 1496 | for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets; | ||
| 1497 | ++render_target) { | ||
| 1498 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | ||
| 1499 | for (u32 component = 0; component < 4; ++component) { | ||
| 1500 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||
| 1501 | shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, | ||
| 1502 | regs.GetRegisterAsFloat(current_reg))); | ||
| 1503 | ++current_reg; | ||
| 1504 | } | ||
| 1505 | } | ||
| 1506 | } | ||
| 1507 | |||
| 1508 | if (header.ps.omap.depth) { | ||
| 1509 | // The depth output is always 2 registers after the last color output, and current_reg | ||
| 1510 | // already contains one past the last color register. | ||
| 1511 | |||
| 1512 | shader.AddLine( | ||
| 1513 | "gl_FragDepth = " + | ||
| 1514 | regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) + | ||
| 1515 | ';'); | ||
| 1516 | } | ||
| 1517 | } | ||
| 1518 | |||
| 1519 | /// Unpacks a video instruction operand (e.g. VMAD). | ||
| 1520 | std::string GetVideoOperand(const std::string& op, bool is_chunk, bool is_signed, | ||
| 1521 | Tegra::Shader::VideoType type, u64 byte_height) { | ||
| 1522 | const std::string value = [&]() { | ||
| 1523 | if (!is_chunk) { | ||
| 1524 | const auto offset = static_cast<u32>(byte_height * 8); | ||
| 1525 | return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)"; | ||
| 1526 | } | ||
| 1527 | const std::string zero = "0"; | ||
| 1528 | |||
| 1529 | switch (type) { | ||
| 1530 | case Tegra::Shader::VideoType::Size16_Low: | ||
| 1531 | return '(' + op + " & 0xffff)"; | ||
| 1532 | case Tegra::Shader::VideoType::Size16_High: | ||
| 1533 | return '(' + op + " >> 16)"; | ||
| 1534 | case Tegra::Shader::VideoType::Size32: | ||
| 1535 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when | ||
| 1536 | // this type is used (1 * 1 + 0 == 0x5b800000). Until a better | ||
| 1537 | // explanation is found: abort. | ||
| 1538 | UNIMPLEMENTED(); | ||
| 1539 | return zero; | ||
| 1540 | case Tegra::Shader::VideoType::Invalid: | ||
| 1541 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 1542 | return zero; | ||
| 1543 | default: | ||
| 1544 | UNREACHABLE(); | ||
| 1545 | return zero; | ||
| 1546 | } | ||
| 1547 | }(); | ||
| 1548 | |||
| 1549 | if (is_signed) { | ||
| 1550 | return "int(" + value + ')'; | ||
| 1551 | } | ||
| 1552 | return value; | ||
| 1553 | }; | ||
| 1554 | |||
| 1555 | /// Gets the A operand for a video instruction. | ||
| 1556 | std::string GetVideoOperandA(Instruction instr) { | ||
| 1557 | return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr8, 0, false), | ||
| 1558 | instr.video.is_byte_chunk_a != 0, instr.video.signed_a, | ||
| 1559 | instr.video.type_a, instr.video.byte_height_a); | ||
| 1560 | } | ||
| 1561 | |||
| 1562 | /// Gets the B operand for a video instruction. | ||
| 1563 | std::string GetVideoOperandB(Instruction instr) { | ||
| 1564 | if (instr.video.use_register_b) { | ||
| 1565 | return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 1566 | instr.video.is_byte_chunk_b != 0, instr.video.signed_b, | ||
| 1567 | instr.video.type_b, instr.video.byte_height_b); | ||
| 1568 | } else { | ||
| 1569 | return '(' + | ||
| 1570 | std::to_string(instr.video.signed_b ? static_cast<s16>(instr.alu.GetImm20_16()) | ||
| 1571 | : instr.alu.GetImm20_16()) + | ||
| 1572 | ')'; | ||
| 1573 | } | ||
| 1574 | } | ||
| 1575 | |||
| 1576 | std::pair<size_t, std::string> ValidateAndGetCoordinateElement( | ||
| 1577 | const Tegra::Shader::TextureType texture_type, const bool depth_compare, | ||
| 1578 | const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) { | ||
| 1579 | const size_t coord_count = TextureCoordinates(texture_type); | ||
| 1580 | |||
| 1581 | size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 1582 | const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 1583 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 1584 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 1585 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 1586 | } | ||
| 1587 | // 1D.DC opengl is using a vec3 but 2nd component is ignored later. | ||
| 1588 | total_coord_count += | ||
| 1589 | (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) | ||
| 1590 | ? 1 | ||
| 1591 | : 0; | ||
| 1592 | |||
| 1593 | constexpr std::array<const char*, 5> coord_container{ | ||
| 1594 | {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(", | ||
| 1595 | "vec4 coord = vec4("}}; | ||
| 1596 | |||
| 1597 | return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]); | ||
| 1598 | } | ||
| 1599 | |||
| 1600 | std::string GetTextureCode(const Tegra::Shader::Instruction& instr, | ||
| 1601 | const Tegra::Shader::TextureType texture_type, | ||
| 1602 | const Tegra::Shader::TextureProcessMode process_mode, | ||
| 1603 | const bool depth_compare, const bool is_array, | ||
| 1604 | const size_t bias_offset) { | ||
| 1605 | |||
| 1606 | if ((texture_type == Tegra::Shader::TextureType::Texture3D && | ||
| 1607 | (is_array || depth_compare)) || | ||
| 1608 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && | ||
| 1609 | depth_compare)) { | ||
| 1610 | UNIMPLEMENTED_MSG("This method is not supported."); | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | const std::string sampler = | ||
| 1614 | GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 1615 | |||
| 1616 | const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ || | ||
| 1617 | process_mode == Tegra::Shader::TextureProcessMode::LL || | ||
| 1618 | process_mode == Tegra::Shader::TextureProcessMode::LLA; | ||
| 1619 | |||
| 1620 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | ||
| 1621 | // sampler2DArrayShadow and samplerCubeArrayShadow. | ||
| 1622 | const bool gl_lod_supported = !( | ||
| 1623 | (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || | ||
| 1624 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); | ||
| 1625 | |||
| 1626 | const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture("; | ||
| 1627 | std::string texture = read_method + sampler + ", coord"; | ||
| 1628 | |||
| 1629 | UNIMPLEMENTED_IF(process_mode != Tegra::Shader::TextureProcessMode::None && | ||
| 1630 | !gl_lod_supported); | ||
| 1631 | |||
| 1632 | if (process_mode != Tegra::Shader::TextureProcessMode::None && gl_lod_supported) { | ||
| 1633 | if (process_mode == Tegra::Shader::TextureProcessMode::LZ) { | ||
| 1634 | texture += ", 0.0"; | ||
| 1635 | } else { | ||
| 1636 | // If present, lod or bias are always stored in the register indexed by the | ||
| 1637 | // gpr20 | ||
| 1638 | // field with an offset depending on the usage of the other registers | ||
| 1639 | texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset); | ||
| 1640 | } | ||
| 1641 | } | ||
| 1642 | texture += ")"; | ||
| 1643 | return texture; | ||
| 1644 | } | ||
| 1645 | |||
| 1646 | std::pair<std::string, std::string> GetTEXCode( | ||
| 1647 | const Instruction& instr, const Tegra::Shader::TextureType texture_type, | ||
| 1648 | const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare, | ||
| 1649 | const bool is_array) { | ||
| 1650 | const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && | ||
| 1651 | process_mode != Tegra::Shader::TextureProcessMode::LZ); | ||
| 1652 | |||
| 1653 | const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement( | ||
| 1654 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||
| 1655 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1656 | const u64 array_register = instr.gpr8.Value(); | ||
| 1657 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 1658 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 1659 | |||
| 1660 | std::string coord = coord_dcl; | ||
| 1661 | for (size_t i = 0; i < coord_count;) { | ||
| 1662 | coord += regs.GetRegisterAsFloat(coord_register + i); | ||
| 1663 | ++i; | ||
| 1664 | if (i != coord_count) { | ||
| 1665 | coord += ','; | ||
| 1666 | } | ||
| 1667 | } | ||
| 1668 | // 1D.DC in opengl the 2nd component is ignored. | ||
| 1669 | if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) { | ||
| 1670 | coord += ",0.0"; | ||
| 1671 | } | ||
| 1672 | if (is_array) { | ||
| 1673 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1674 | } | ||
| 1675 | if (depth_compare) { | ||
| 1676 | // Depth is always stored in the register signaled by gpr20 | ||
| 1677 | // or in the next register if lod or bias are used | ||
| 1678 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 1679 | coord += ',' + regs.GetRegisterAsFloat(depth_register); | ||
| 1680 | } | ||
| 1681 | coord += ");"; | ||
| 1682 | return std::make_pair( | ||
| 1683 | coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0)); | ||
| 1684 | } | ||
| 1685 | |||
| 1686 | std::pair<std::string, std::string> GetTEXSCode( | ||
| 1687 | const Instruction& instr, const Tegra::Shader::TextureType texture_type, | ||
| 1688 | const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare, | ||
| 1689 | const bool is_array) { | ||
| 1690 | const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && | ||
| 1691 | process_mode != Tegra::Shader::TextureProcessMode::LZ); | ||
| 1692 | |||
| 1693 | const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement( | ||
| 1694 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | ||
| 1695 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1696 | const u64 array_register = instr.gpr8.Value(); | ||
| 1697 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 1698 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 1699 | const u64 last_coord_register = | ||
| 1700 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 1701 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 1702 | : coord_register + 1; | ||
| 1703 | |||
| 1704 | std::string coord = coord_dcl; | ||
| 1705 | for (size_t i = 0; i < coord_count; ++i) { | ||
| 1706 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 1707 | coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i); | ||
| 1708 | if (i < coord_count - 1) { | ||
| 1709 | coord += ','; | ||
| 1710 | } | ||
| 1711 | } | ||
| 1712 | |||
| 1713 | if (is_array) { | ||
| 1714 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1715 | } | ||
| 1716 | if (depth_compare) { | ||
| 1717 | // Depth is always stored in the register signaled by gpr20 | ||
| 1718 | // or in the next register if lod or bias are used | ||
| 1719 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 1720 | coord += ',' + regs.GetRegisterAsFloat(depth_register); | ||
| 1721 | } | ||
| 1722 | coord += ");"; | ||
| 1723 | |||
| 1724 | return std::make_pair(coord, | ||
| 1725 | GetTextureCode(instr, texture_type, process_mode, depth_compare, | ||
| 1726 | is_array, (coord_count > 2 ? 1 : 0))); | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | std::pair<std::string, std::string> GetTLD4Code(const Instruction& instr, | ||
| 1730 | const Tegra::Shader::TextureType texture_type, | ||
| 1731 | const bool depth_compare, const bool is_array) { | ||
| 1732 | |||
| 1733 | const size_t coord_count = TextureCoordinates(texture_type); | ||
| 1734 | const size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 1735 | const size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||
| 1736 | |||
| 1737 | constexpr std::array<const char*, 5> coord_container{ | ||
| 1738 | {"", "", "vec2 coord = vec2(", "vec3 coord = vec3(", "vec4 coord = vec4("}}; | ||
| 1739 | |||
| 1740 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1741 | const u64 array_register = instr.gpr8.Value(); | ||
| 1742 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 1743 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 1744 | |||
| 1745 | std::string coord = coord_container[total_coord_count]; | ||
| 1746 | for (size_t i = 0; i < coord_count;) { | ||
| 1747 | coord += regs.GetRegisterAsFloat(coord_register + i); | ||
| 1748 | ++i; | ||
| 1749 | if (i != coord_count) { | ||
| 1750 | coord += ','; | ||
| 1751 | } | ||
| 1752 | } | ||
| 1753 | |||
| 1754 | if (is_array) { | ||
| 1755 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1756 | } | ||
| 1757 | coord += ");"; | ||
| 1758 | |||
| 1759 | const std::string sampler = | ||
| 1760 | GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 1761 | |||
| 1762 | std::string texture = "textureGather(" + sampler + ", coord, "; | ||
| 1763 | if (depth_compare) { | ||
| 1764 | // Depth is always stored in the register signaled by gpr20 | ||
| 1765 | texture += regs.GetRegisterAsFloat(instr.gpr20.Value()) + ')'; | ||
| 1766 | } else { | ||
| 1767 | texture += std::to_string(instr.tld4.component) + ')'; | ||
| 1768 | } | ||
| 1769 | return std::make_pair(coord, texture); | ||
| 1770 | } | ||
| 1771 | |||
| 1772 | std::pair<std::string, std::string> GetTLDSCode(const Instruction& instr, | ||
| 1773 | const Tegra::Shader::TextureType texture_type, | ||
| 1774 | const bool is_array) { | ||
| 1775 | |||
| 1776 | const size_t coord_count = TextureCoordinates(texture_type); | ||
| 1777 | const size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 1778 | const bool lod_enabled = | ||
| 1779 | instr.tlds.GetTextureProcessMode() == Tegra::Shader::TextureProcessMode::LL; | ||
| 1780 | |||
| 1781 | constexpr std::array<const char*, 4> coord_container{ | ||
| 1782 | {"", "int coords = (", "ivec2 coords = ivec2(", "ivec3 coords = ivec3("}}; | ||
| 1783 | |||
| 1784 | std::string coord = coord_container[total_coord_count]; | ||
| 1785 | |||
| 1786 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1787 | const u64 array_register = instr.gpr8.Value(); | ||
| 1788 | |||
| 1789 | // if is array gpr20 is used | ||
| 1790 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 1791 | |||
| 1792 | const u64 last_coord_register = | ||
| 1793 | ((coord_count > 2) || (coord_count == 2 && !lod_enabled)) && !is_array | ||
| 1794 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 1795 | : coord_register + 1; | ||
| 1796 | |||
| 1797 | for (size_t i = 0; i < coord_count; ++i) { | ||
| 1798 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 1799 | coord += regs.GetRegisterAsInteger(last ? last_coord_register : coord_register + i); | ||
| 1800 | if (i < coord_count - 1) { | ||
| 1801 | coord += ','; | ||
| 1802 | } | ||
| 1803 | } | ||
| 1804 | if (is_array) { | ||
| 1805 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1806 | } | ||
| 1807 | coord += ");"; | ||
| 1808 | |||
| 1809 | const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 1810 | |||
| 1811 | std::string texture = "texelFetch(" + sampler + ", coords"; | ||
| 1812 | |||
| 1813 | if (lod_enabled) { | ||
| 1814 | // When lod is used always is in grp20 | ||
| 1815 | texture += ", " + regs.GetRegisterAsInteger(instr.gpr20) + ')'; | ||
| 1816 | } else { | ||
| 1817 | texture += ", 0)"; | ||
| 1818 | } | ||
| 1819 | return std::make_pair(coord, texture); | ||
| 1820 | } | ||
| 1821 | |||
| 1822 | /** | ||
| 1823 | * Compiles a single instruction from Tegra to GLSL. | ||
| 1824 | * @param offset the offset of the Tegra shader instruction. | ||
| 1825 | * @return the offset of the next instruction to execute. Usually it is the current offset | ||
| 1826 | * + 1. If the current instruction always terminates the program, returns PROGRAM_END. | ||
| 1827 | */ | ||
| 1828 | u32 CompileInstr(u32 offset) { | ||
| 1829 | // Ignore sched instructions when generating code. | ||
| 1830 | if (IsSchedInstruction(offset)) { | ||
| 1831 | return offset + 1; | ||
| 1832 | } | ||
| 1833 | |||
| 1834 | const Instruction instr = {program_code[offset]}; | ||
| 1835 | const auto opcode = OpCode::Decode(instr); | ||
| 1836 | |||
| 1837 | // Decoding failure | ||
| 1838 | if (!opcode) { | ||
| 1839 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||
| 1840 | return offset + 1; | ||
| 1841 | } | ||
| 1842 | |||
| 1843 | shader.AddLine( | ||
| 1844 | fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value)); | ||
| 1845 | |||
| 1846 | using Tegra::Shader::Pred; | ||
| 1847 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||
| 1848 | "NeverExecute predicate not implemented"); | ||
| 1849 | |||
| 1850 | // Some instructions (like SSY) don't have a predicate field, they are always | ||
| 1851 | // unconditionally executed. | ||
| 1852 | bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); | ||
| 1853 | |||
| 1854 | if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 1855 | shader.AddLine("if (" + | ||
| 1856 | GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) + | ||
| 1857 | ')'); | ||
| 1858 | shader.AddLine('{'); | ||
| 1859 | ++shader.scope; | ||
| 1860 | } | ||
| 1861 | |||
| 1862 | switch (opcode->get().GetType()) { | ||
| 1863 | case OpCode::Type::Arithmetic: { | ||
| 1864 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 1865 | |||
| 1866 | std::string op_b; | ||
| 1867 | |||
| 1868 | if (instr.is_b_imm) { | ||
| 1869 | op_b = GetImmediate19(instr); | ||
| 1870 | } else { | ||
| 1871 | if (instr.is_b_gpr) { | ||
| 1872 | op_b = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 1873 | } else { | ||
| 1874 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 1875 | GLSLRegister::Type::Float); | ||
| 1876 | } | ||
| 1877 | } | ||
| 1878 | |||
| 1879 | switch (opcode->get().GetId()) { | ||
| 1880 | case OpCode::Id::MOV_C: | ||
| 1881 | case OpCode::Id::MOV_R: { | ||
| 1882 | // MOV does not have neither 'abs' nor 'neg' bits. | ||
| 1883 | regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); | ||
| 1884 | break; | ||
| 1885 | } | ||
| 1886 | |||
| 1887 | case OpCode::Id::FMUL_C: | ||
| 1888 | case OpCode::Id::FMUL_R: | ||
| 1889 | case OpCode::Id::FMUL_IMM: { | ||
| 1890 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | ||
| 1891 | UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, | ||
| 1892 | "FMUL tab5cb8_2({}) is not implemented", | ||
| 1893 | instr.fmul.tab5cb8_2.Value()); | ||
| 1894 | UNIMPLEMENTED_IF_MSG( | ||
| 1895 | instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", | ||
| 1896 | instr.fmul.tab5c68_0 | ||
| 1897 | .Value()); // SMO typical sends 1 here which seems to be the default | ||
| 1898 | |||
| 1899 | op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); | ||
| 1900 | |||
| 1901 | std::string postfactor_op; | ||
| 1902 | if (instr.fmul.postfactor != 0) { | ||
| 1903 | s8 postfactor = static_cast<s8>(instr.fmul.postfactor); | ||
| 1904 | |||
| 1905 | // postfactor encoded as 3-bit 1's complement in instruction, | ||
| 1906 | // interpreted with below logic. | ||
| 1907 | if (postfactor >= 4) { | ||
| 1908 | postfactor = 7 - postfactor; | ||
| 1909 | } else { | ||
| 1910 | postfactor = 0 - postfactor; | ||
| 1911 | } | ||
| 1912 | |||
| 1913 | if (postfactor > 0) { | ||
| 1914 | postfactor_op = " * " + std::to_string(1 << postfactor); | ||
| 1915 | } else { | ||
| 1916 | postfactor_op = " / " + std::to_string(1 << -postfactor); | ||
| 1917 | } | ||
| 1918 | } | ||
| 1919 | |||
| 1920 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + postfactor_op, 1, 1, | ||
| 1921 | instr.alu.saturate_d, instr.generates_cc, 0, true); | ||
| 1922 | break; | ||
| 1923 | } | ||
| 1924 | case OpCode::Id::FADD_C: | ||
| 1925 | case OpCode::Id::FADD_R: | ||
| 1926 | case OpCode::Id::FADD_IMM: { | ||
| 1927 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 1928 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 1929 | |||
| 1930 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, | ||
| 1931 | instr.alu.saturate_d, instr.generates_cc, 0, true); | ||
| 1932 | break; | ||
| 1933 | } | ||
| 1934 | case OpCode::Id::MUFU: { | ||
| 1935 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 1936 | switch (instr.sub_op) { | ||
| 1937 | case SubOp::Cos: | ||
| 1938 | regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, | ||
| 1939 | instr.alu.saturate_d, false, 0, true); | ||
| 1940 | break; | ||
| 1941 | case SubOp::Sin: | ||
| 1942 | regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, | ||
| 1943 | instr.alu.saturate_d, false, 0, true); | ||
| 1944 | break; | ||
| 1945 | case SubOp::Ex2: | ||
| 1946 | regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, | ||
| 1947 | instr.alu.saturate_d, false, 0, true); | ||
| 1948 | break; | ||
| 1949 | case SubOp::Lg2: | ||
| 1950 | regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, | ||
| 1951 | instr.alu.saturate_d, false, 0, true); | ||
| 1952 | break; | ||
| 1953 | case SubOp::Rcp: | ||
| 1954 | regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, | ||
| 1955 | instr.alu.saturate_d, false, 0, true); | ||
| 1956 | break; | ||
| 1957 | case SubOp::Rsq: | ||
| 1958 | regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, | ||
| 1959 | instr.alu.saturate_d, false, 0, true); | ||
| 1960 | break; | ||
| 1961 | case SubOp::Sqrt: | ||
| 1962 | regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, | ||
| 1963 | instr.alu.saturate_d, false, 0, true); | ||
| 1964 | break; | ||
| 1965 | default: | ||
| 1966 | UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", | ||
| 1967 | static_cast<unsigned>(instr.sub_op.Value())); | ||
| 1968 | } | ||
| 1969 | break; | ||
| 1970 | } | ||
| 1971 | case OpCode::Id::FMNMX_C: | ||
| 1972 | case OpCode::Id::FMNMX_R: | ||
| 1973 | case OpCode::Id::FMNMX_IMM: { | ||
| 1974 | UNIMPLEMENTED_IF_MSG( | ||
| 1975 | instr.generates_cc, | ||
| 1976 | "Condition codes generation in FMNMX is partially implemented"); | ||
| 1977 | |||
| 1978 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 1979 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 1980 | |||
| 1981 | std::string condition = | ||
| 1982 | GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); | ||
| 1983 | std::string parameters = op_a + ',' + op_b; | ||
| 1984 | regs.SetRegisterToFloat(instr.gpr0, 0, | ||
| 1985 | '(' + condition + ") ? min(" + parameters + ") : max(" + | ||
| 1986 | parameters + ')', | ||
| 1987 | 1, 1, false, instr.generates_cc, 0, true); | ||
| 1988 | break; | ||
| 1989 | } | ||
| 1990 | case OpCode::Id::RRO_C: | ||
| 1991 | case OpCode::Id::RRO_R: | ||
| 1992 | case OpCode::Id::RRO_IMM: { | ||
| 1993 | // Currently RRO is only implemented as a register move. | ||
| 1994 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 1995 | regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); | ||
| 1996 | LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); | ||
| 1997 | break; | ||
| 1998 | } | ||
| 1999 | default: { | ||
| 2000 | UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); | ||
| 2001 | } | ||
| 2002 | } | ||
| 2003 | break; | ||
| 2004 | } | ||
| 2005 | case OpCode::Type::ArithmeticImmediate: { | ||
| 2006 | switch (opcode->get().GetId()) { | ||
| 2007 | case OpCode::Id::MOV32_IMM: { | ||
| 2008 | regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1); | ||
| 2009 | break; | ||
| 2010 | } | ||
| 2011 | case OpCode::Id::FMUL32_IMM: { | ||
| 2012 | regs.SetRegisterToFloat( | ||
| 2013 | instr.gpr0, 0, | ||
| 2014 | regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1, | ||
| 2015 | instr.fmul32.saturate, instr.op_32.generates_cc, 0, true); | ||
| 2016 | break; | ||
| 2017 | } | ||
| 2018 | case OpCode::Id::FADD32I: { | ||
| 2019 | UNIMPLEMENTED_IF_MSG( | ||
| 2020 | instr.op_32.generates_cc, | ||
| 2021 | "Condition codes generation in FADD32I is partially implemented"); | ||
| 2022 | |||
| 2023 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2024 | std::string op_b = GetImmediate32(instr); | ||
| 2025 | |||
| 2026 | if (instr.fadd32i.abs_a) { | ||
| 2027 | op_a = "abs(" + op_a + ')'; | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | if (instr.fadd32i.negate_a) { | ||
| 2031 | op_a = "-(" + op_a + ')'; | ||
| 2032 | } | ||
| 2033 | |||
| 2034 | if (instr.fadd32i.abs_b) { | ||
| 2035 | op_b = "abs(" + op_b + ')'; | ||
| 2036 | } | ||
| 2037 | |||
| 2038 | if (instr.fadd32i.negate_b) { | ||
| 2039 | op_b = "-(" + op_b + ')'; | ||
| 2040 | } | ||
| 2041 | |||
| 2042 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, | ||
| 2043 | instr.op_32.generates_cc, 0, true); | ||
| 2044 | break; | ||
| 2045 | } | ||
| 2046 | } | ||
| 2047 | break; | ||
| 2048 | } | ||
| 2049 | case OpCode::Type::Bfe: { | ||
| 2050 | UNIMPLEMENTED_IF(instr.bfe.negate_b); | ||
| 2051 | |||
| 2052 | std::string op_a = instr.bfe.negate_a ? "-" : ""; | ||
| 2053 | op_a += regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2054 | |||
| 2055 | switch (opcode->get().GetId()) { | ||
| 2056 | case OpCode::Id::BFE_IMM: { | ||
| 2057 | std::string inner_shift = | ||
| 2058 | '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')'; | ||
| 2059 | std::string outer_shift = | ||
| 2060 | '(' + inner_shift + " >> " + | ||
| 2061 | std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')'; | ||
| 2062 | |||
| 2063 | regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1, false, | ||
| 2064 | instr.generates_cc); | ||
| 2065 | break; | ||
| 2066 | } | ||
| 2067 | default: { | ||
| 2068 | UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); | ||
| 2069 | } | ||
| 2070 | } | ||
| 2071 | |||
| 2072 | break; | ||
| 2073 | } | ||
| 2074 | case OpCode::Type::Bfi: { | ||
| 2075 | const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> { | ||
| 2076 | switch (opcode->get().GetId()) { | ||
| 2077 | case OpCode::Id::BFI_IMM_R: | ||
| 2078 | return {regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2079 | std::to_string(instr.alu.GetSignedImm20_20())}; | ||
| 2080 | default: | ||
| 2081 | UNREACHABLE(); | ||
| 2082 | return {regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2083 | std::to_string(instr.alu.GetSignedImm20_20())}; | ||
| 2084 | } | ||
| 2085 | }(); | ||
| 2086 | const std::string offset = '(' + packed_shift + " & 0xff)"; | ||
| 2087 | const std::string bits = "((" + packed_shift + " >> 8) & 0xff)"; | ||
| 2088 | const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false); | ||
| 2089 | regs.SetRegisterToInteger(instr.gpr0, false, 0, | ||
| 2090 | "bitfieldInsert(" + base + ", " + insert + ", " + offset + | ||
| 2091 | ", " + bits + ')', | ||
| 2092 | 1, 1, false, instr.generates_cc); | ||
| 2093 | break; | ||
| 2094 | } | ||
| 2095 | case OpCode::Type::Shift: { | ||
| 2096 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true); | ||
| 2097 | std::string op_b; | ||
| 2098 | |||
| 2099 | if (instr.is_b_imm) { | ||
| 2100 | op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; | ||
| 2101 | } else { | ||
| 2102 | if (instr.is_b_gpr) { | ||
| 2103 | op_b += regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2104 | } else { | ||
| 2105 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2106 | GLSLRegister::Type::Integer); | ||
| 2107 | } | ||
| 2108 | } | ||
| 2109 | |||
| 2110 | switch (opcode->get().GetId()) { | ||
| 2111 | case OpCode::Id::SHR_C: | ||
| 2112 | case OpCode::Id::SHR_R: | ||
| 2113 | case OpCode::Id::SHR_IMM: { | ||
| 2114 | if (!instr.shift.is_signed) { | ||
| 2115 | // Logical shift right | ||
| 2116 | op_a = "uint(" + op_a + ')'; | ||
| 2117 | } | ||
| 2118 | |||
| 2119 | // Cast to int is superfluous for arithmetic shift, it's only for a logical shift | ||
| 2120 | regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')', | ||
| 2121 | 1, 1, false, instr.generates_cc); | ||
| 2122 | break; | ||
| 2123 | } | ||
| 2124 | case OpCode::Id::SHL_C: | ||
| 2125 | case OpCode::Id::SHL_R: | ||
| 2126 | case OpCode::Id::SHL_IMM: | ||
| 2127 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 2128 | "Condition codes generation in SHL is not implemented"); | ||
| 2129 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1, false, | ||
| 2130 | instr.generates_cc); | ||
| 2131 | break; | ||
| 2132 | default: { | ||
| 2133 | UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); | ||
| 2134 | } | ||
| 2135 | } | ||
| 2136 | break; | ||
| 2137 | } | ||
| 2138 | case OpCode::Type::ArithmeticIntegerImmediate: { | ||
| 2139 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2140 | std::string op_b = std::to_string(instr.alu.imm20_32.Value()); | ||
| 2141 | |||
| 2142 | switch (opcode->get().GetId()) { | ||
| 2143 | case OpCode::Id::IADD32I: | ||
| 2144 | UNIMPLEMENTED_IF_MSG( | ||
| 2145 | instr.op_32.generates_cc, | ||
| 2146 | "Condition codes generation in IADD32I is partially implemented"); | ||
| 2147 | |||
| 2148 | if (instr.iadd32i.negate_a) | ||
| 2149 | op_a = "-(" + op_a + ')'; | ||
| 2150 | |||
| 2151 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, | ||
| 2152 | instr.iadd32i.saturate, instr.op_32.generates_cc); | ||
| 2153 | break; | ||
| 2154 | case OpCode::Id::LOP32I: { | ||
| 2155 | |||
| 2156 | if (instr.alu.lop32i.invert_a) | ||
| 2157 | op_a = "~(" + op_a + ')'; | ||
| 2158 | |||
| 2159 | if (instr.alu.lop32i.invert_b) | ||
| 2160 | op_b = "~(" + op_b + ')'; | ||
| 2161 | |||
| 2162 | WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, | ||
| 2163 | Tegra::Shader::PredicateResultMode::None, | ||
| 2164 | Tegra::Shader::Pred::UnusedIndex, instr.op_32.generates_cc); | ||
| 2165 | break; | ||
| 2166 | } | ||
| 2167 | default: { | ||
| 2168 | UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", | ||
| 2169 | opcode->get().GetName()); | ||
| 2170 | } | ||
| 2171 | } | ||
| 2172 | break; | ||
| 2173 | } | ||
| 2174 | case OpCode::Type::ArithmeticInteger: { | ||
| 2175 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2176 | std::string op_b; | ||
| 2177 | if (instr.is_b_imm) { | ||
| 2178 | op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; | ||
| 2179 | } else { | ||
| 2180 | if (instr.is_b_gpr) { | ||
| 2181 | op_b += regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2182 | } else { | ||
| 2183 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2184 | GLSLRegister::Type::Integer); | ||
| 2185 | } | ||
| 2186 | } | ||
| 2187 | |||
| 2188 | switch (opcode->get().GetId()) { | ||
| 2189 | case OpCode::Id::IADD_C: | ||
| 2190 | case OpCode::Id::IADD_R: | ||
| 2191 | case OpCode::Id::IADD_IMM: { | ||
| 2192 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 2193 | "Condition codes generation in IADD is partially implemented"); | ||
| 2194 | |||
| 2195 | if (instr.alu_integer.negate_a) | ||
| 2196 | op_a = "-(" + op_a + ')'; | ||
| 2197 | |||
| 2198 | if (instr.alu_integer.negate_b) | ||
| 2199 | op_b = "-(" + op_b + ')'; | ||
| 2200 | |||
| 2201 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, | ||
| 2202 | instr.alu.saturate_d, instr.generates_cc); | ||
| 2203 | break; | ||
| 2204 | } | ||
| 2205 | case OpCode::Id::IADD3_C: | ||
| 2206 | case OpCode::Id::IADD3_R: | ||
| 2207 | case OpCode::Id::IADD3_IMM: { | ||
| 2208 | UNIMPLEMENTED_IF_MSG( | ||
| 2209 | instr.generates_cc, | ||
| 2210 | "Condition codes generation in IADD3 is partially implemented"); | ||
| 2211 | |||
| 2212 | std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); | ||
| 2213 | |||
| 2214 | auto apply_height = [](auto height, auto& oprand) { | ||
| 2215 | switch (height) { | ||
| 2216 | case Tegra::Shader::IAdd3Height::None: | ||
| 2217 | break; | ||
| 2218 | case Tegra::Shader::IAdd3Height::LowerHalfWord: | ||
| 2219 | oprand = "((" + oprand + ") & 0xFFFF)"; | ||
| 2220 | break; | ||
| 2221 | case Tegra::Shader::IAdd3Height::UpperHalfWord: | ||
| 2222 | oprand = "((" + oprand + ") >> 16)"; | ||
| 2223 | break; | ||
| 2224 | default: | ||
| 2225 | UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", | ||
| 2226 | static_cast<u32>(height.Value())); | ||
| 2227 | } | ||
| 2228 | }; | ||
| 2229 | |||
| 2230 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 2231 | apply_height(instr.iadd3.height_a, op_a); | ||
| 2232 | apply_height(instr.iadd3.height_b, op_b); | ||
| 2233 | apply_height(instr.iadd3.height_c, op_c); | ||
| 2234 | } | ||
| 2235 | |||
| 2236 | if (instr.iadd3.neg_a) | ||
| 2237 | op_a = "-(" + op_a + ')'; | ||
| 2238 | |||
| 2239 | if (instr.iadd3.neg_b) | ||
| 2240 | op_b = "-(" + op_b + ')'; | ||
| 2241 | |||
| 2242 | if (instr.iadd3.neg_c) | ||
| 2243 | op_c = "-(" + op_c + ')'; | ||
| 2244 | |||
| 2245 | std::string result; | ||
| 2246 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 2247 | switch (instr.iadd3.mode) { | ||
| 2248 | case Tegra::Shader::IAdd3Mode::RightShift: | ||
| 2249 | // TODO(tech4me): According to | ||
| 2250 | // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 | ||
| 2251 | // The addition between op_a and op_b should be done in uint33, more | ||
| 2252 | // investigation required | ||
| 2253 | result = "(((" + op_a + " + " + op_b + ") >> 16) + " + op_c + ')'; | ||
| 2254 | break; | ||
| 2255 | case Tegra::Shader::IAdd3Mode::LeftShift: | ||
| 2256 | result = "(((" + op_a + " + " + op_b + ") << 16) + " + op_c + ')'; | ||
| 2257 | break; | ||
| 2258 | default: | ||
| 2259 | result = '(' + op_a + " + " + op_b + " + " + op_c + ')'; | ||
| 2260 | break; | ||
| 2261 | } | ||
| 2262 | } else { | ||
| 2263 | result = '(' + op_a + " + " + op_b + " + " + op_c + ')'; | ||
| 2264 | } | ||
| 2265 | |||
| 2266 | regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1, false, | ||
| 2267 | instr.generates_cc); | ||
| 2268 | break; | ||
| 2269 | } | ||
| 2270 | case OpCode::Id::ISCADD_C: | ||
| 2271 | case OpCode::Id::ISCADD_R: | ||
| 2272 | case OpCode::Id::ISCADD_IMM: { | ||
| 2273 | UNIMPLEMENTED_IF_MSG( | ||
| 2274 | instr.generates_cc, | ||
| 2275 | "Condition codes generation in ISCADD is partially implemented"); | ||
| 2276 | |||
| 2277 | if (instr.alu_integer.negate_a) | ||
| 2278 | op_a = "-(" + op_a + ')'; | ||
| 2279 | |||
| 2280 | if (instr.alu_integer.negate_b) | ||
| 2281 | op_b = "-(" + op_b + ')'; | ||
| 2282 | |||
| 2283 | const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value()); | ||
| 2284 | |||
| 2285 | regs.SetRegisterToInteger(instr.gpr0, true, 0, | ||
| 2286 | "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1, | ||
| 2287 | false, instr.generates_cc); | ||
| 2288 | break; | ||
| 2289 | } | ||
| 2290 | case OpCode::Id::POPC_C: | ||
| 2291 | case OpCode::Id::POPC_R: | ||
| 2292 | case OpCode::Id::POPC_IMM: { | ||
| 2293 | if (instr.popc.invert) { | ||
| 2294 | op_b = "~(" + op_b + ')'; | ||
| 2295 | } | ||
| 2296 | regs.SetRegisterToInteger(instr.gpr0, true, 0, "bitCount(" + op_b + ')', 1, 1); | ||
| 2297 | break; | ||
| 2298 | } | ||
| 2299 | case OpCode::Id::SEL_C: | ||
| 2300 | case OpCode::Id::SEL_R: | ||
| 2301 | case OpCode::Id::SEL_IMM: { | ||
| 2302 | const std::string condition = | ||
| 2303 | GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0); | ||
| 2304 | regs.SetRegisterToInteger(instr.gpr0, true, 0, | ||
| 2305 | '(' + condition + ") ? " + op_a + " : " + op_b, 1, 1); | ||
| 2306 | break; | ||
| 2307 | } | ||
| 2308 | case OpCode::Id::LOP_C: | ||
| 2309 | case OpCode::Id::LOP_R: | ||
| 2310 | case OpCode::Id::LOP_IMM: { | ||
| 2311 | |||
| 2312 | if (instr.alu.lop.invert_a) | ||
| 2313 | op_a = "~(" + op_a + ')'; | ||
| 2314 | |||
| 2315 | if (instr.alu.lop.invert_b) | ||
| 2316 | op_b = "~(" + op_b + ')'; | ||
| 2317 | |||
| 2318 | WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b, | ||
| 2319 | instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, | ||
| 2320 | instr.generates_cc); | ||
| 2321 | break; | ||
| 2322 | } | ||
| 2323 | case OpCode::Id::LOP3_C: | ||
| 2324 | case OpCode::Id::LOP3_R: | ||
| 2325 | case OpCode::Id::LOP3_IMM: { | ||
| 2326 | const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); | ||
| 2327 | std::string lut; | ||
| 2328 | |||
| 2329 | if (opcode->get().GetId() == OpCode::Id::LOP3_R) { | ||
| 2330 | lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')'; | ||
| 2331 | } else { | ||
| 2332 | lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')'; | ||
| 2333 | } | ||
| 2334 | |||
| 2335 | WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); | ||
| 2336 | break; | ||
| 2337 | } | ||
| 2338 | case OpCode::Id::IMNMX_C: | ||
| 2339 | case OpCode::Id::IMNMX_R: | ||
| 2340 | case OpCode::Id::IMNMX_IMM: { | ||
| 2341 | UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); | ||
| 2342 | UNIMPLEMENTED_IF_MSG( | ||
| 2343 | instr.generates_cc, | ||
| 2344 | "Condition codes generation in IMNMX is partially implemented"); | ||
| 2345 | |||
| 2346 | const std::string condition = | ||
| 2347 | GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0); | ||
| 2348 | const std::string parameters = op_a + ',' + op_b; | ||
| 2349 | regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0, | ||
| 2350 | '(' + condition + ") ? min(" + parameters + ") : max(" + | ||
| 2351 | parameters + ')', | ||
| 2352 | 1, 1, false, instr.generates_cc); | ||
| 2353 | break; | ||
| 2354 | } | ||
| 2355 | case OpCode::Id::LEA_R2: | ||
| 2356 | case OpCode::Id::LEA_R1: | ||
| 2357 | case OpCode::Id::LEA_IMM: | ||
| 2358 | case OpCode::Id::LEA_RZ: | ||
| 2359 | case OpCode::Id::LEA_HI: { | ||
| 2360 | std::string op_c; | ||
| 2361 | |||
| 2362 | switch (opcode->get().GetId()) { | ||
| 2363 | case OpCode::Id::LEA_R2: { | ||
| 2364 | op_a = regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2365 | op_b = regs.GetRegisterAsInteger(instr.gpr39); | ||
| 2366 | op_c = std::to_string(instr.lea.r2.entry_a); | ||
| 2367 | break; | ||
| 2368 | } | ||
| 2369 | |||
| 2370 | case OpCode::Id::LEA_R1: { | ||
| 2371 | const bool neg = instr.lea.r1.neg != 0; | ||
| 2372 | op_a = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2373 | if (neg) | ||
| 2374 | op_a = "-(" + op_a + ')'; | ||
| 2375 | op_b = regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2376 | op_c = std::to_string(instr.lea.r1.entry_a); | ||
| 2377 | break; | ||
| 2378 | } | ||
| 2379 | |||
| 2380 | case OpCode::Id::LEA_IMM: { | ||
| 2381 | const bool neg = instr.lea.imm.neg != 0; | ||
| 2382 | op_b = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2383 | if (neg) | ||
| 2384 | op_b = "-(" + op_b + ')'; | ||
| 2385 | op_a = std::to_string(instr.lea.imm.entry_a); | ||
| 2386 | op_c = std::to_string(instr.lea.imm.entry_b); | ||
| 2387 | break; | ||
| 2388 | } | ||
| 2389 | |||
| 2390 | case OpCode::Id::LEA_RZ: { | ||
| 2391 | const bool neg = instr.lea.rz.neg != 0; | ||
| 2392 | op_b = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2393 | if (neg) | ||
| 2394 | op_b = "-(" + op_b + ')'; | ||
| 2395 | op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset, | ||
| 2396 | GLSLRegister::Type::Integer); | ||
| 2397 | op_c = std::to_string(instr.lea.rz.entry_a); | ||
| 2398 | |||
| 2399 | break; | ||
| 2400 | } | ||
| 2401 | |||
| 2402 | case OpCode::Id::LEA_HI: | ||
| 2403 | default: { | ||
| 2404 | op_b = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2405 | op_a = std::to_string(instr.lea.imm.entry_a); | ||
| 2406 | op_c = std::to_string(instr.lea.imm.entry_b); | ||
| 2407 | UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); | ||
| 2408 | } | ||
| 2409 | } | ||
| 2410 | UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), | ||
| 2411 | "Unhandled LEA Predicate"); | ||
| 2412 | const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))"; | ||
| 2413 | regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1, false, | ||
| 2414 | instr.generates_cc); | ||
| 2415 | |||
| 2416 | break; | ||
| 2417 | } | ||
| 2418 | default: { | ||
| 2419 | UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", | ||
| 2420 | opcode->get().GetName()); | ||
| 2421 | } | ||
| 2422 | } | ||
| 2423 | |||
| 2424 | break; | ||
| 2425 | } | ||
| 2426 | case OpCode::Type::ArithmeticHalf: { | ||
| 2427 | if (opcode->get().GetId() == OpCode::Id::HADD2_C || | ||
| 2428 | opcode->get().GetId() == OpCode::Id::HADD2_R) { | ||
| 2429 | UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); | ||
| 2430 | } | ||
| 2431 | const bool negate_a = | ||
| 2432 | opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; | ||
| 2433 | const bool negate_b = | ||
| 2434 | opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; | ||
| 2435 | |||
| 2436 | const std::string op_a = | ||
| 2437 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a, | ||
| 2438 | instr.alu_half.abs_a != 0, negate_a); | ||
| 2439 | |||
| 2440 | std::string op_b; | ||
| 2441 | switch (opcode->get().GetId()) { | ||
| 2442 | case OpCode::Id::HADD2_C: | ||
| 2443 | case OpCode::Id::HMUL2_C: | ||
| 2444 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2445 | GLSLRegister::Type::UnsignedInteger); | ||
| 2446 | break; | ||
| 2447 | case OpCode::Id::HADD2_R: | ||
| 2448 | case OpCode::Id::HMUL2_R: | ||
| 2449 | op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, false); | ||
| 2450 | break; | ||
| 2451 | default: | ||
| 2452 | UNREACHABLE(); | ||
| 2453 | op_b = "0"; | ||
| 2454 | break; | ||
| 2455 | } | ||
| 2456 | op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b); | ||
| 2457 | |||
| 2458 | const std::string result = [&]() { | ||
| 2459 | switch (opcode->get().GetId()) { | ||
| 2460 | case OpCode::Id::HADD2_C: | ||
| 2461 | case OpCode::Id::HADD2_R: | ||
| 2462 | return '(' + op_a + " + " + op_b + ')'; | ||
| 2463 | case OpCode::Id::HMUL2_C: | ||
| 2464 | case OpCode::Id::HMUL2_R: | ||
| 2465 | return '(' + op_a + " * " + op_b + ')'; | ||
| 2466 | default: | ||
| 2467 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", | ||
| 2468 | opcode->get().GetName()); | ||
| 2469 | return std::string("0"); | ||
| 2470 | } | ||
| 2471 | }(); | ||
| 2472 | |||
| 2473 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half.merge, 1, 1, | ||
| 2474 | instr.alu_half.saturate != 0); | ||
| 2475 | break; | ||
| 2476 | } | ||
| 2477 | case OpCode::Type::ArithmeticHalfImmediate: { | ||
| 2478 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | ||
| 2479 | UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); | ||
| 2480 | } else { | ||
| 2481 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != | ||
| 2482 | Tegra::Shader::HalfPrecision::None); | ||
| 2483 | } | ||
| 2484 | |||
| 2485 | const std::string op_a = GetHalfFloat( | ||
| 2486 | regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half_imm.type_a, | ||
| 2487 | instr.alu_half_imm.abs_a != 0, instr.alu_half_imm.negate_a != 0); | ||
| 2488 | |||
| 2489 | const std::string op_b = UnpackHalfImmediate(instr, true); | ||
| 2490 | |||
| 2491 | const std::string result = [&]() { | ||
| 2492 | switch (opcode->get().GetId()) { | ||
| 2493 | case OpCode::Id::HADD2_IMM: | ||
| 2494 | return op_a + " + " + op_b; | ||
| 2495 | case OpCode::Id::HMUL2_IMM: | ||
| 2496 | return op_a + " * " + op_b; | ||
| 2497 | default: | ||
| 2498 | UNREACHABLE(); | ||
| 2499 | return std::string("0"); | ||
| 2500 | } | ||
| 2501 | }(); | ||
| 2502 | |||
| 2503 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half_imm.merge, 1, 1, | ||
| 2504 | instr.alu_half_imm.saturate != 0); | ||
| 2505 | break; | ||
| 2506 | } | ||
| 2507 | case OpCode::Type::Ffma: { | ||
| 2508 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2509 | std::string op_b = instr.ffma.negate_b ? "-" : ""; | ||
| 2510 | std::string op_c = instr.ffma.negate_c ? "-" : ""; | ||
| 2511 | |||
| 2512 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | ||
| 2513 | UNIMPLEMENTED_IF_MSG( | ||
| 2514 | instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", | ||
| 2515 | instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO | ||
| 2516 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", | ||
| 2517 | instr.ffma.tab5980_1.Value()); | ||
| 2518 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 2519 | "Condition codes generation in FFMA is partially implemented"); | ||
| 2520 | |||
| 2521 | switch (opcode->get().GetId()) { | ||
| 2522 | case OpCode::Id::FFMA_CR: { | ||
| 2523 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2524 | GLSLRegister::Type::Float); | ||
| 2525 | op_c += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2526 | break; | ||
| 2527 | } | ||
| 2528 | case OpCode::Id::FFMA_RR: { | ||
| 2529 | op_b += regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2530 | op_c += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2531 | break; | ||
| 2532 | } | ||
| 2533 | case OpCode::Id::FFMA_RC: { | ||
| 2534 | op_b += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2535 | op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2536 | GLSLRegister::Type::Float); | ||
| 2537 | break; | ||
| 2538 | } | ||
| 2539 | case OpCode::Id::FFMA_IMM: { | ||
| 2540 | op_b += GetImmediate19(instr); | ||
| 2541 | op_c += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2542 | break; | ||
| 2543 | } | ||
| 2544 | default: { | ||
| 2545 | UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); | ||
| 2546 | } | ||
| 2547 | } | ||
| 2548 | |||
| 2549 | regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', | ||
| 2550 | 1, 1, instr.alu.saturate_d, instr.generates_cc, 0, true); | ||
| 2551 | break; | ||
| 2552 | } | ||
| 2553 | case OpCode::Type::Hfma2: { | ||
| 2554 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | ||
| 2555 | UNIMPLEMENTED_IF(instr.hfma2.rr.precision != Tegra::Shader::HalfPrecision::None); | ||
| 2556 | } else { | ||
| 2557 | UNIMPLEMENTED_IF(instr.hfma2.precision != Tegra::Shader::HalfPrecision::None); | ||
| 2558 | } | ||
| 2559 | const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR | ||
| 2560 | ? instr.hfma2.rr.saturate != 0 | ||
| 2561 | : instr.hfma2.saturate != 0; | ||
| 2562 | |||
| 2563 | const std::string op_a = | ||
| 2564 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a); | ||
| 2565 | std::string op_b, op_c; | ||
| 2566 | |||
| 2567 | switch (opcode->get().GetId()) { | ||
| 2568 | case OpCode::Id::HFMA2_CR: | ||
| 2569 | op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2570 | GLSLRegister::Type::UnsignedInteger), | ||
| 2571 | instr.hfma2.type_b, false, instr.hfma2.negate_b); | ||
| 2572 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2573 | instr.hfma2.type_reg39, false, instr.hfma2.negate_c); | ||
| 2574 | break; | ||
| 2575 | case OpCode::Id::HFMA2_RC: | ||
| 2576 | op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2577 | instr.hfma2.type_reg39, false, instr.hfma2.negate_b); | ||
| 2578 | op_c = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2579 | GLSLRegister::Type::UnsignedInteger), | ||
| 2580 | instr.hfma2.type_b, false, instr.hfma2.negate_c); | ||
| 2581 | break; | ||
| 2582 | case OpCode::Id::HFMA2_RR: | ||
| 2583 | op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 2584 | instr.hfma2.type_b, false, instr.hfma2.negate_b); | ||
| 2585 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2586 | instr.hfma2.rr.type_c, false, instr.hfma2.rr.negate_c); | ||
| 2587 | break; | ||
| 2588 | case OpCode::Id::HFMA2_IMM_R: | ||
| 2589 | op_b = UnpackHalfImmediate(instr, true); | ||
| 2590 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2591 | instr.hfma2.type_reg39, false, instr.hfma2.negate_c); | ||
| 2592 | break; | ||
| 2593 | default: | ||
| 2594 | UNREACHABLE(); | ||
| 2595 | op_c = op_b = "vec2(0)"; | ||
| 2596 | break; | ||
| 2597 | } | ||
| 2598 | |||
| 2599 | const std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | ||
| 2600 | |||
| 2601 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.hfma2.merge, 1, 1, saturate); | ||
| 2602 | break; | ||
| 2603 | } | ||
| 2604 | case OpCode::Type::Conversion: { | ||
| 2605 | switch (opcode->get().GetId()) { | ||
| 2606 | case OpCode::Id::I2I_R: { | ||
| 2607 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 2608 | |||
| 2609 | std::string op_a = regs.GetRegisterAsInteger( | ||
| 2610 | instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size); | ||
| 2611 | |||
| 2612 | if (instr.conversion.abs_a) { | ||
| 2613 | op_a = "abs(" + op_a + ')'; | ||
| 2614 | } | ||
| 2615 | |||
| 2616 | if (instr.conversion.negate_a) { | ||
| 2617 | op_a = "-(" + op_a + ')'; | ||
| 2618 | } | ||
| 2619 | |||
| 2620 | regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, | ||
| 2621 | 1, instr.alu.saturate_d, instr.generates_cc, 0, | ||
| 2622 | instr.conversion.dest_size); | ||
| 2623 | break; | ||
| 2624 | } | ||
| 2625 | case OpCode::Id::I2F_R: | ||
| 2626 | case OpCode::Id::I2F_C: { | ||
| 2627 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 2628 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 2629 | std::string op_a; | ||
| 2630 | |||
| 2631 | if (instr.is_b_gpr) { | ||
| 2632 | op_a = | ||
| 2633 | regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed, | ||
| 2634 | instr.conversion.src_size); | ||
| 2635 | } else { | ||
| 2636 | op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2637 | instr.conversion.is_input_signed | ||
| 2638 | ? GLSLRegister::Type::Integer | ||
| 2639 | : GLSLRegister::Type::UnsignedInteger, | ||
| 2640 | instr.conversion.src_size); | ||
| 2641 | } | ||
| 2642 | |||
| 2643 | if (instr.conversion.abs_a) { | ||
| 2644 | op_a = "abs(" + op_a + ')'; | ||
| 2645 | } | ||
| 2646 | |||
| 2647 | if (instr.conversion.negate_a) { | ||
| 2648 | op_a = "-(" + op_a + ')'; | ||
| 2649 | } | ||
| 2650 | |||
| 2651 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, false, instr.generates_cc); | ||
| 2652 | break; | ||
| 2653 | } | ||
| 2654 | case OpCode::Id::F2F_R: { | ||
| 2655 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 2656 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||
| 2657 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2658 | |||
| 2659 | if (instr.conversion.abs_a) { | ||
| 2660 | op_a = "abs(" + op_a + ')'; | ||
| 2661 | } | ||
| 2662 | |||
| 2663 | if (instr.conversion.negate_a) { | ||
| 2664 | op_a = "-(" + op_a + ')'; | ||
| 2665 | } | ||
| 2666 | |||
| 2667 | switch (instr.conversion.f2f.rounding) { | ||
| 2668 | case Tegra::Shader::F2fRoundingOp::None: | ||
| 2669 | break; | ||
| 2670 | case Tegra::Shader::F2fRoundingOp::Round: | ||
| 2671 | op_a = "roundEven(" + op_a + ')'; | ||
| 2672 | break; | ||
| 2673 | case Tegra::Shader::F2fRoundingOp::Floor: | ||
| 2674 | op_a = "floor(" + op_a + ')'; | ||
| 2675 | break; | ||
| 2676 | case Tegra::Shader::F2fRoundingOp::Ceil: | ||
| 2677 | op_a = "ceil(" + op_a + ')'; | ||
| 2678 | break; | ||
| 2679 | case Tegra::Shader::F2fRoundingOp::Trunc: | ||
| 2680 | op_a = "trunc(" + op_a + ')'; | ||
| 2681 | break; | ||
| 2682 | default: | ||
| 2683 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||
| 2684 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); | ||
| 2685 | break; | ||
| 2686 | } | ||
| 2687 | |||
| 2688 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d, | ||
| 2689 | instr.generates_cc); | ||
| 2690 | break; | ||
| 2691 | } | ||
| 2692 | case OpCode::Id::F2I_R: | ||
| 2693 | case OpCode::Id::F2I_C: { | ||
| 2694 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||
| 2695 | std::string op_a{}; | ||
| 2696 | |||
| 2697 | if (instr.is_b_gpr) { | ||
| 2698 | op_a = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2699 | } else { | ||
| 2700 | op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2701 | GLSLRegister::Type::Float); | ||
| 2702 | } | ||
| 2703 | |||
| 2704 | if (instr.conversion.abs_a) { | ||
| 2705 | op_a = "abs(" + op_a + ')'; | ||
| 2706 | } | ||
| 2707 | |||
| 2708 | if (instr.conversion.negate_a) { | ||
| 2709 | op_a = "-(" + op_a + ')'; | ||
| 2710 | } | ||
| 2711 | |||
| 2712 | switch (instr.conversion.f2i.rounding) { | ||
| 2713 | case Tegra::Shader::F2iRoundingOp::None: | ||
| 2714 | break; | ||
| 2715 | case Tegra::Shader::F2iRoundingOp::Floor: | ||
| 2716 | op_a = "floor(" + op_a + ')'; | ||
| 2717 | break; | ||
| 2718 | case Tegra::Shader::F2iRoundingOp::Ceil: | ||
| 2719 | op_a = "ceil(" + op_a + ')'; | ||
| 2720 | break; | ||
| 2721 | case Tegra::Shader::F2iRoundingOp::Trunc: | ||
| 2722 | op_a = "trunc(" + op_a + ')'; | ||
| 2723 | break; | ||
| 2724 | default: | ||
| 2725 | UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", | ||
| 2726 | static_cast<u32>(instr.conversion.f2i.rounding.Value())); | ||
| 2727 | break; | ||
| 2728 | } | ||
| 2729 | |||
| 2730 | if (instr.conversion.is_output_signed) { | ||
| 2731 | op_a = "int(" + op_a + ')'; | ||
| 2732 | } else { | ||
| 2733 | op_a = "uint(" + op_a + ')'; | ||
| 2734 | } | ||
| 2735 | |||
| 2736 | regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, | ||
| 2737 | 1, false, instr.generates_cc, 0, | ||
| 2738 | instr.conversion.dest_size); | ||
| 2739 | break; | ||
| 2740 | } | ||
| 2741 | default: { | ||
| 2742 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 2743 | } | ||
| 2744 | } | ||
| 2745 | break; | ||
| 2746 | } | ||
| 2747 | case OpCode::Type::Memory: { | ||
| 2748 | switch (opcode->get().GetId()) { | ||
| 2749 | case OpCode::Id::LD_A: { | ||
| 2750 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 2751 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 2752 | "Indirect attribute loads are not supported"); | ||
| 2753 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 2754 | "Unaligned attribute loads are not supported"); | ||
| 2755 | |||
| 2756 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, | ||
| 2757 | Tegra::Shader::IpaSampleMode::Default}; | ||
| 2758 | |||
| 2759 | u64 next_element = instr.attribute.fmt20.element; | ||
| 2760 | u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 2761 | |||
| 2762 | const auto LoadNextElement = [&](u32 reg_offset) { | ||
| 2763 | regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element, | ||
| 2764 | static_cast<Attribute::Index>(next_index), | ||
| 2765 | input_mode, instr.gpr39.Value()); | ||
| 2766 | |||
| 2767 | // Load the next attribute element into the following register. If the element | ||
| 2768 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 2769 | // attribute. | ||
| 2770 | next_element = (next_element + 1) % 4; | ||
| 2771 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 2772 | }; | ||
| 2773 | |||
| 2774 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 2775 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 2776 | LoadNextElement(reg_offset); | ||
| 2777 | } | ||
| 2778 | break; | ||
| 2779 | } | ||
| 2780 | case OpCode::Id::LD_C: { | ||
| 2781 | UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); | ||
| 2782 | |||
| 2783 | const auto scope = shader.Scope(); | ||
| 2784 | |||
| 2785 | shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + | ||
| 2786 | " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);"); | ||
| 2787 | |||
| 2788 | const std::string op_a = | ||
| 2789 | regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index", | ||
| 2790 | GLSLRegister::Type::Float); | ||
| 2791 | |||
| 2792 | switch (instr.ld_c.type.Value()) { | ||
| 2793 | case Tegra::Shader::UniformType::Single: | ||
| 2794 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | ||
| 2795 | break; | ||
| 2796 | |||
| 2797 | case Tegra::Shader::UniformType::Double: { | ||
| 2798 | const std::string op_b = | ||
| 2799 | regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, | ||
| 2800 | "index", GLSLRegister::Type::Float); | ||
| 2801 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | ||
| 2802 | regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1); | ||
| 2803 | break; | ||
| 2804 | } | ||
| 2805 | default: | ||
| 2806 | UNIMPLEMENTED_MSG("Unhandled type: {}", | ||
| 2807 | static_cast<unsigned>(instr.ld_c.type.Value())); | ||
| 2808 | } | ||
| 2809 | break; | ||
| 2810 | } | ||
| 2811 | case OpCode::Id::LD_L: { | ||
| 2812 | UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", | ||
| 2813 | static_cast<unsigned>(instr.ld_l.unknown.Value())); | ||
| 2814 | |||
| 2815 | const auto scope = shader.Scope(); | ||
| 2816 | |||
| 2817 | std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + | ||
| 2818 | std::to_string(instr.smem_imm.Value()) + ')'; | ||
| 2819 | |||
| 2820 | shader.AddLine("uint index = (" + op + " / 4);"); | ||
| 2821 | |||
| 2822 | const std::string op_a = regs.GetLocalMemoryAsFloat("index"); | ||
| 2823 | |||
| 2824 | switch (instr.ldst_sl.type.Value()) { | ||
| 2825 | case Tegra::Shader::StoreType::Bytes32: | ||
| 2826 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | ||
| 2827 | break; | ||
| 2828 | default: | ||
| 2829 | UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", | ||
| 2830 | static_cast<unsigned>(instr.ldst_sl.type.Value())); | ||
| 2831 | } | ||
| 2832 | break; | ||
| 2833 | } | ||
| 2834 | case OpCode::Id::ST_A: { | ||
| 2835 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 2836 | "Indirect attribute loads are not supported"); | ||
| 2837 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 2838 | "Unaligned attribute loads are not supported"); | ||
| 2839 | |||
| 2840 | u64 next_element = instr.attribute.fmt20.element; | ||
| 2841 | u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 2842 | |||
| 2843 | const auto StoreNextElement = [&](u32 reg_offset) { | ||
| 2844 | regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index), | ||
| 2845 | next_element, instr.gpr0.Value() + reg_offset, | ||
| 2846 | instr.gpr39.Value()); | ||
| 2847 | |||
| 2848 | // Load the next attribute element into the following register. If the element | ||
| 2849 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 2850 | // attribute. | ||
| 2851 | next_element = (next_element + 1) % 4; | ||
| 2852 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 2853 | }; | ||
| 2854 | |||
| 2855 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 2856 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 2857 | StoreNextElement(reg_offset); | ||
| 2858 | } | ||
| 2859 | |||
| 2860 | break; | ||
| 2861 | } | ||
| 2862 | case OpCode::Id::ST_L: { | ||
| 2863 | UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", | ||
| 2864 | static_cast<unsigned>(instr.st_l.unknown.Value())); | ||
| 2865 | |||
| 2866 | const auto scope = shader.Scope(); | ||
| 2867 | |||
| 2868 | std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + | ||
| 2869 | std::to_string(instr.smem_imm.Value()) + ')'; | ||
| 2870 | |||
| 2871 | shader.AddLine("uint index = (" + op + " / 4);"); | ||
| 2872 | |||
| 2873 | switch (instr.ldst_sl.type.Value()) { | ||
| 2874 | case Tegra::Shader::StoreType::Bytes32: | ||
| 2875 | regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0)); | ||
| 2876 | break; | ||
| 2877 | default: | ||
| 2878 | UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", | ||
| 2879 | static_cast<unsigned>(instr.ldst_sl.type.Value())); | ||
| 2880 | } | ||
| 2881 | break; | ||
| 2882 | } | ||
| 2883 | case OpCode::Id::TEX: { | ||
| 2884 | Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; | ||
| 2885 | const bool is_array = instr.tex.array != 0; | ||
| 2886 | const bool depth_compare = | ||
| 2887 | instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | ||
| 2888 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 2889 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2890 | "NODEP is not implemented"); | ||
| 2891 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 2892 | "AOFFI is not implemented"); | ||
| 2893 | |||
| 2894 | const auto [coord, texture] = | ||
| 2895 | GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 2896 | |||
| 2897 | const auto scope = shader.Scope(); | ||
| 2898 | shader.AddLine(coord); | ||
| 2899 | |||
| 2900 | if (depth_compare) { | ||
| 2901 | regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1); | ||
| 2902 | } else { | ||
| 2903 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 2904 | std::size_t dest_elem{}; | ||
| 2905 | for (std::size_t elem = 0; elem < 4; ++elem) { | ||
| 2906 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 2907 | // Skip disabled components | ||
| 2908 | continue; | ||
| 2909 | } | ||
| 2910 | regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false, | ||
| 2911 | dest_elem); | ||
| 2912 | ++dest_elem; | ||
| 2913 | } | ||
| 2914 | } | ||
| 2915 | break; | ||
| 2916 | } | ||
| 2917 | case OpCode::Id::TEXS: { | ||
| 2918 | Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 2919 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 2920 | const bool depth_compare = | ||
| 2921 | instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | ||
| 2922 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 2923 | |||
| 2924 | UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2925 | "NODEP is not implemented"); | ||
| 2926 | |||
| 2927 | const auto scope = shader.Scope(); | ||
| 2928 | |||
| 2929 | auto [coord, texture] = | ||
| 2930 | GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 2931 | |||
| 2932 | shader.AddLine(coord); | ||
| 2933 | |||
| 2934 | if (depth_compare) { | ||
| 2935 | texture = "vec4(" + texture + ')'; | ||
| 2936 | } | ||
| 2937 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 2938 | |||
| 2939 | if (instr.texs.fp32_flag) { | ||
| 2940 | WriteTexsInstructionFloat(instr, "texture_tmp"); | ||
| 2941 | } else { | ||
| 2942 | WriteTexsInstructionHalfFloat(instr, "texture_tmp"); | ||
| 2943 | } | ||
| 2944 | break; | ||
| 2945 | } | ||
| 2946 | case OpCode::Id::TLDS: { | ||
| 2947 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 2948 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 2949 | |||
| 2950 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2951 | "NODEP is not implemented"); | ||
| 2952 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 2953 | "AOFFI is not implemented"); | ||
| 2954 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ), | ||
| 2955 | "MZ is not implemented"); | ||
| 2956 | |||
| 2957 | const auto [coord, texture] = GetTLDSCode(instr, texture_type, is_array); | ||
| 2958 | |||
| 2959 | const auto scope = shader.Scope(); | ||
| 2960 | |||
| 2961 | shader.AddLine(coord); | ||
| 2962 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 2963 | WriteTexsInstructionFloat(instr, "texture_tmp"); | ||
| 2964 | break; | ||
| 2965 | } | ||
| 2966 | case OpCode::Id::TLD4: { | ||
| 2967 | |||
| 2968 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2969 | "NODEP is not implemented"); | ||
| 2970 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 2971 | "AOFFI is not implemented"); | ||
| 2972 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 2973 | "NDV is not implemented"); | ||
| 2974 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP), | ||
| 2975 | "PTP is not implemented"); | ||
| 2976 | |||
| 2977 | auto texture_type = instr.tld4.texture_type.Value(); | ||
| 2978 | const bool depth_compare = | ||
| 2979 | instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | ||
| 2980 | const bool is_array = instr.tld4.array != 0; | ||
| 2981 | |||
| 2982 | const auto [coord, texture] = | ||
| 2983 | GetTLD4Code(instr, texture_type, depth_compare, is_array); | ||
| 2984 | |||
| 2985 | const auto scope = shader.Scope(); | ||
| 2986 | |||
| 2987 | shader.AddLine(coord); | ||
| 2988 | std::size_t dest_elem{}; | ||
| 2989 | |||
| 2990 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 2991 | for (std::size_t elem = 0; elem < 4; ++elem) { | ||
| 2992 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 2993 | // Skip disabled components | ||
| 2994 | continue; | ||
| 2995 | } | ||
| 2996 | regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false, | ||
| 2997 | dest_elem); | ||
| 2998 | ++dest_elem; | ||
| 2999 | } | ||
| 3000 | break; | ||
| 3001 | } | ||
| 3002 | case OpCode::Id::TLD4S: { | ||
| 3003 | UNIMPLEMENTED_IF_MSG( | ||
| 3004 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 3005 | "NODEP is not implemented"); | ||
| 3006 | UNIMPLEMENTED_IF_MSG( | ||
| 3007 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 3008 | "AOFFI is not implemented"); | ||
| 3009 | |||
| 3010 | const auto scope = shader.Scope(); | ||
| 3011 | |||
| 3012 | std::string coords; | ||
| 3013 | |||
| 3014 | const bool depth_compare = | ||
| 3015 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | ||
| 3016 | |||
| 3017 | const std::string sampler = GetSampler( | ||
| 3018 | instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); | ||
| 3019 | |||
| 3020 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 3021 | coords = "vec2 coords = vec2(" + op_a + ", "; | ||
| 3022 | std::string texture = "textureGather(" + sampler + ", coords, "; | ||
| 3023 | |||
| 3024 | if (!depth_compare) { | ||
| 3025 | const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 3026 | coords += op_b + ");"; | ||
| 3027 | texture += std::to_string(instr.tld4s.component) + ')'; | ||
| 3028 | } else { | ||
| 3029 | const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 3030 | const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 3031 | coords += op_b + ");"; | ||
| 3032 | texture += op_c + ')'; | ||
| 3033 | } | ||
| 3034 | shader.AddLine(coords); | ||
| 3035 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 3036 | WriteTexsInstructionFloat(instr, "texture_tmp"); | ||
| 3037 | break; | ||
| 3038 | } | ||
| 3039 | case OpCode::Id::TXQ: { | ||
| 3040 | UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 3041 | "NODEP is not implemented"); | ||
| 3042 | |||
| 3043 | const auto scope = shader.Scope(); | ||
| 3044 | |||
| 3045 | // TODO: The new commits on the texture refactor, change the way samplers work. | ||
| 3046 | // Sadly, not all texture instructions specify the type of texture their sampler | ||
| 3047 | // uses. This must be fixed at a later instance. | ||
| 3048 | const std::string sampler = | ||
| 3049 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 3050 | switch (instr.txq.query_type) { | ||
| 3051 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 3052 | const std::string texture = "textureSize(" + sampler + ", " + | ||
| 3053 | regs.GetRegisterAsInteger(instr.gpr8) + ')'; | ||
| 3054 | const std::string mip_level = "textureQueryLevels(" + sampler + ')'; | ||
| 3055 | shader.AddLine("ivec2 sizes = " + texture + ';'); | ||
| 3056 | |||
| 3057 | regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1); | ||
| 3058 | regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1); | ||
| 3059 | regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1); | ||
| 3060 | regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1); | ||
| 3061 | break; | ||
| 3062 | } | ||
| 3063 | default: { | ||
| 3064 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", | ||
| 3065 | static_cast<u32>(instr.txq.query_type.Value())); | ||
| 3066 | } | ||
| 3067 | } | ||
| 3068 | break; | ||
| 3069 | } | ||
| 3070 | case OpCode::Id::TMML: { | ||
| 3071 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 3072 | "NODEP is not implemented"); | ||
| 3073 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 3074 | "NDV is not implemented"); | ||
| 3075 | |||
| 3076 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 3077 | const bool is_array = instr.tmml.array != 0; | ||
| 3078 | auto texture_type = instr.tmml.texture_type.Value(); | ||
| 3079 | const std::string sampler = | ||
| 3080 | GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 3081 | |||
| 3082 | const auto scope = shader.Scope(); | ||
| 3083 | |||
| 3084 | // TODO: Add coordinates for different samplers once other texture types are | ||
| 3085 | // implemented. | ||
| 3086 | switch (texture_type) { | ||
| 3087 | case Tegra::Shader::TextureType::Texture1D: { | ||
| 3088 | shader.AddLine("float coords = " + x + ';'); | ||
| 3089 | break; | ||
| 3090 | } | ||
| 3091 | case Tegra::Shader::TextureType::Texture2D: { | ||
| 3092 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 3093 | shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); | ||
| 3094 | break; | ||
| 3095 | } | ||
| 3096 | default: | ||
| 3097 | UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); | ||
| 3098 | |||
| 3099 | // Fallback to interpreting as a 2D texture for now | ||
| 3100 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 3101 | shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); | ||
| 3102 | texture_type = Tegra::Shader::TextureType::Texture2D; | ||
| 3103 | } | ||
| 3104 | |||
| 3105 | const std::string texture = "textureQueryLod(" + sampler + ", coords)"; | ||
| 3106 | shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);"); | ||
| 3107 | |||
| 3108 | regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1); | ||
| 3109 | regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1); | ||
| 3110 | break; | ||
| 3111 | } | ||
| 3112 | default: { | ||
| 3113 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 3114 | } | ||
| 3115 | } | ||
| 3116 | break; | ||
| 3117 | } | ||
| 3118 | case OpCode::Type::FloatSetPredicate: { | ||
| 3119 | const std::string op_a = | ||
| 3120 | GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), instr.fsetp.abs_a != 0, | ||
| 3121 | instr.fsetp.neg_a != 0); | ||
| 3122 | |||
| 3123 | std::string op_b; | ||
| 3124 | |||
| 3125 | if (instr.is_b_imm) { | ||
| 3126 | op_b += '(' + GetImmediate19(instr) + ')'; | ||
| 3127 | } else { | ||
| 3128 | if (instr.is_b_gpr) { | ||
| 3129 | op_b += regs.GetRegisterAsFloat(instr.gpr20); | ||
| 3130 | } else { | ||
| 3131 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3132 | GLSLRegister::Type::Float); | ||
| 3133 | } | ||
| 3134 | } | ||
| 3135 | |||
| 3136 | if (instr.fsetp.abs_b) { | ||
| 3137 | op_b = "abs(" + op_b + ')'; | ||
| 3138 | } | ||
| 3139 | |||
| 3140 | // We can't use the constant predicate as destination. | ||
| 3141 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3142 | |||
| 3143 | const std::string second_pred = | ||
| 3144 | GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | ||
| 3145 | |||
| 3146 | const std::string combiner = GetPredicateCombiner(instr.fsetp.op); | ||
| 3147 | |||
| 3148 | const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b); | ||
| 3149 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 3150 | SetPredicate(instr.fsetp.pred3, | ||
| 3151 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3152 | |||
| 3153 | if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3154 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 3155 | // if enabled | ||
| 3156 | SetPredicate(instr.fsetp.pred0, | ||
| 3157 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3158 | } | ||
| 3159 | break; | ||
| 3160 | } | ||
| 3161 | case OpCode::Type::IntegerSetPredicate: { | ||
| 3162 | const std::string op_a = | ||
| 3163 | regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed); | ||
| 3164 | std::string op_b; | ||
| 3165 | |||
| 3166 | if (instr.is_b_imm) { | ||
| 3167 | op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; | ||
| 3168 | } else { | ||
| 3169 | if (instr.is_b_gpr) { | ||
| 3170 | op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed); | ||
| 3171 | } else { | ||
| 3172 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3173 | GLSLRegister::Type::Integer); | ||
| 3174 | } | ||
| 3175 | } | ||
| 3176 | |||
| 3177 | // We can't use the constant predicate as destination. | ||
| 3178 | ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3179 | |||
| 3180 | const std::string second_pred = | ||
| 3181 | GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0); | ||
| 3182 | |||
| 3183 | const std::string combiner = GetPredicateCombiner(instr.isetp.op); | ||
| 3184 | |||
| 3185 | const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b); | ||
| 3186 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 3187 | SetPredicate(instr.isetp.pred3, | ||
| 3188 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3189 | |||
| 3190 | if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3191 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 3192 | // if enabled | ||
| 3193 | SetPredicate(instr.isetp.pred0, | ||
| 3194 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3195 | } | ||
| 3196 | break; | ||
| 3197 | } | ||
| 3198 | case OpCode::Type::HalfSetPredicate: { | ||
| 3199 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | ||
| 3200 | |||
| 3201 | const std::string op_a = | ||
| 3202 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a, | ||
| 3203 | instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 3204 | |||
| 3205 | const std::string op_b = [&]() { | ||
| 3206 | switch (opcode->get().GetId()) { | ||
| 3207 | case OpCode::Id::HSETP2_R: | ||
| 3208 | return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 3209 | instr.hsetp2.type_b, instr.hsetp2.abs_a, | ||
| 3210 | instr.hsetp2.negate_b); | ||
| 3211 | default: | ||
| 3212 | UNREACHABLE(); | ||
| 3213 | return std::string("vec2(0)"); | ||
| 3214 | } | ||
| 3215 | }(); | ||
| 3216 | |||
| 3217 | // We can't use the constant predicate as destination. | ||
| 3218 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3219 | |||
| 3220 | const std::string second_pred = | ||
| 3221 | GetPredicateCondition(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | ||
| 3222 | |||
| 3223 | const std::string combiner = GetPredicateCombiner(instr.hsetp2.op); | ||
| 3224 | |||
| 3225 | const std::string component_combiner = instr.hsetp2.h_and ? "&&" : "||"; | ||
| 3226 | const std::string predicate = | ||
| 3227 | '(' + GetPredicateComparison(instr.hsetp2.cond, op_a + ".x", op_b + ".x") + ' ' + | ||
| 3228 | component_combiner + ' ' + | ||
| 3229 | GetPredicateComparison(instr.hsetp2.cond, op_a + ".y", op_b + ".y") + ')'; | ||
| 3230 | |||
| 3231 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 3232 | SetPredicate(instr.hsetp2.pred3, | ||
| 3233 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3234 | |||
| 3235 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3236 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 3237 | // if enabled | ||
| 3238 | SetPredicate(instr.hsetp2.pred0, | ||
| 3239 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3240 | } | ||
| 3241 | break; | ||
| 3242 | } | ||
| 3243 | case OpCode::Type::PredicateSetRegister: { | ||
| 3244 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 3245 | "Condition codes generation in PSET is partially implemented"); | ||
| 3246 | |||
| 3247 | const std::string op_a = | ||
| 3248 | GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); | ||
| 3249 | const std::string op_b = | ||
| 3250 | GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0); | ||
| 3251 | |||
| 3252 | const std::string second_pred = | ||
| 3253 | GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0); | ||
| 3254 | |||
| 3255 | const std::string combiner = GetPredicateCombiner(instr.pset.op); | ||
| 3256 | |||
| 3257 | const std::string predicate = | ||
| 3258 | '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')'; | ||
| 3259 | const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')'; | ||
| 3260 | if (instr.pset.bf == 0) { | ||
| 3261 | const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0"; | ||
| 3262 | regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1, false, | ||
| 3263 | instr.generates_cc); | ||
| 3264 | } else { | ||
| 3265 | const std::string value = '(' + result + ") ? 1.0 : 0.0"; | ||
| 3266 | regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1, false, instr.generates_cc); | ||
| 3267 | } | ||
| 3268 | break; | ||
| 3269 | } | ||
| 3270 | case OpCode::Type::PredicateSetPredicate: { | ||
| 3271 | switch (opcode->get().GetId()) { | ||
| 3272 | case OpCode::Id::PSETP: { | ||
| 3273 | const std::string op_a = | ||
| 3274 | GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||
| 3275 | const std::string op_b = | ||
| 3276 | GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 3277 | |||
| 3278 | // We can't use the constant predicate as destination. | ||
| 3279 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3280 | |||
| 3281 | const std::string second_pred = | ||
| 3282 | GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||
| 3283 | |||
| 3284 | const std::string combiner = GetPredicateCombiner(instr.psetp.op); | ||
| 3285 | |||
| 3286 | const std::string predicate = | ||
| 3287 | '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; | ||
| 3288 | |||
| 3289 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 3290 | SetPredicate(instr.psetp.pred3, | ||
| 3291 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3292 | |||
| 3293 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3294 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 3295 | // if enabled | ||
| 3296 | SetPredicate(instr.psetp.pred0, | ||
| 3297 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3298 | } | ||
| 3299 | break; | ||
| 3300 | } | ||
| 3301 | case OpCode::Id::CSETP: { | ||
| 3302 | const std::string pred = | ||
| 3303 | GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 3304 | const std::string combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 3305 | const std::string condition_code = regs.GetConditionCode(instr.csetp.cc); | ||
| 3306 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3307 | SetPredicate(instr.csetp.pred3, | ||
| 3308 | '(' + condition_code + ") " + combiner + " (" + pred + ')'); | ||
| 3309 | } | ||
| 3310 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3311 | SetPredicate(instr.csetp.pred0, | ||
| 3312 | "!(" + condition_code + ") " + combiner + " (" + pred + ')'); | ||
| 3313 | } | ||
| 3314 | break; | ||
| 3315 | } | ||
| 3316 | default: { | ||
| 3317 | UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); | ||
| 3318 | } | ||
| 3319 | } | ||
| 3320 | break; | ||
| 3321 | } | ||
| 3322 | case OpCode::Type::RegisterSetPredicate: { | ||
| 3323 | UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); | ||
| 3324 | |||
| 3325 | const std::string apply_mask = [&]() { | ||
| 3326 | switch (opcode->get().GetId()) { | ||
| 3327 | case OpCode::Id::R2P_IMM: | ||
| 3328 | return std::to_string(instr.r2p.immediate_mask); | ||
| 3329 | default: | ||
| 3330 | UNREACHABLE(); | ||
| 3331 | return std::to_string(instr.r2p.immediate_mask); | ||
| 3332 | } | ||
| 3333 | }(); | ||
| 3334 | const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + | ||
| 3335 | " >> " + std::to_string(instr.r2p.byte) + ')'; | ||
| 3336 | |||
| 3337 | constexpr u64 programmable_preds = 7; | ||
| 3338 | for (u64 pred = 0; pred < programmable_preds; ++pred) { | ||
| 3339 | const auto shift = std::to_string(1 << pred); | ||
| 3340 | |||
| 3341 | shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {"); | ||
| 3342 | ++shader.scope; | ||
| 3343 | |||
| 3344 | SetPredicate(pred, '(' + mask + " & " + shift + ") != 0"); | ||
| 3345 | |||
| 3346 | --shader.scope; | ||
| 3347 | shader.AddLine('}'); | ||
| 3348 | } | ||
| 3349 | break; | ||
| 3350 | } | ||
| 3351 | case OpCode::Type::FloatSet: { | ||
| 3352 | const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), | ||
| 3353 | instr.fset.abs_a != 0, instr.fset.neg_a != 0); | ||
| 3354 | |||
| 3355 | std::string op_b; | ||
| 3356 | |||
| 3357 | if (instr.is_b_imm) { | ||
| 3358 | const std::string imm = GetImmediate19(instr); | ||
| 3359 | op_b = imm; | ||
| 3360 | } else { | ||
| 3361 | if (instr.is_b_gpr) { | ||
| 3362 | op_b = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 3363 | } else { | ||
| 3364 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3365 | GLSLRegister::Type::Float); | ||
| 3366 | } | ||
| 3367 | } | ||
| 3368 | |||
| 3369 | op_b = GetOperandAbsNeg(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); | ||
| 3370 | |||
| 3371 | // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 3372 | // condition is true, and to 0 otherwise. | ||
| 3373 | const std::string second_pred = | ||
| 3374 | GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0); | ||
| 3375 | |||
| 3376 | const std::string combiner = GetPredicateCombiner(instr.fset.op); | ||
| 3377 | |||
| 3378 | const std::string predicate = "((" + | ||
| 3379 | GetPredicateComparison(instr.fset.cond, op_a, op_b) + | ||
| 3380 | ") " + combiner + " (" + second_pred + "))"; | ||
| 3381 | |||
| 3382 | if (instr.fset.bf) { | ||
| 3383 | regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1, false, | ||
| 3384 | instr.generates_cc); | ||
| 3385 | } else { | ||
| 3386 | regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, | ||
| 3387 | 1, false, instr.generates_cc); | ||
| 3388 | } | ||
| 3389 | break; | ||
| 3390 | } | ||
| 3391 | case OpCode::Type::IntegerSet: { | ||
| 3392 | const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed); | ||
| 3393 | |||
| 3394 | std::string op_b; | ||
| 3395 | |||
| 3396 | if (instr.is_b_imm) { | ||
| 3397 | op_b = std::to_string(instr.alu.GetSignedImm20_20()); | ||
| 3398 | } else { | ||
| 3399 | if (instr.is_b_gpr) { | ||
| 3400 | op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, instr.iset.is_signed); | ||
| 3401 | } else { | ||
| 3402 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3403 | GLSLRegister::Type::Integer); | ||
| 3404 | } | ||
| 3405 | } | ||
| 3406 | |||
| 3407 | // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 3408 | // condition is true, and to 0 otherwise. | ||
| 3409 | const std::string second_pred = | ||
| 3410 | GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0); | ||
| 3411 | |||
| 3412 | const std::string combiner = GetPredicateCombiner(instr.iset.op); | ||
| 3413 | |||
| 3414 | const std::string predicate = "((" + | ||
| 3415 | GetPredicateComparison(instr.iset.cond, op_a, op_b) + | ||
| 3416 | ") " + combiner + " (" + second_pred + "))"; | ||
| 3417 | |||
| 3418 | if (instr.iset.bf) { | ||
| 3419 | regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); | ||
| 3420 | } else { | ||
| 3421 | regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, | ||
| 3422 | 1); | ||
| 3423 | } | ||
| 3424 | break; | ||
| 3425 | } | ||
| 3426 | case OpCode::Type::HalfSet: { | ||
| 3427 | UNIMPLEMENTED_IF(instr.hset2.ftz != 0); | ||
| 3428 | |||
| 3429 | const std::string op_a = | ||
| 3430 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a, | ||
| 3431 | instr.hset2.abs_a != 0, instr.hset2.negate_a != 0); | ||
| 3432 | |||
| 3433 | const std::string op_b = [&]() { | ||
| 3434 | switch (opcode->get().GetId()) { | ||
| 3435 | case OpCode::Id::HSET2_R: | ||
| 3436 | return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 3437 | instr.hset2.type_b, instr.hset2.abs_b != 0, | ||
| 3438 | instr.hset2.negate_b != 0); | ||
| 3439 | default: | ||
| 3440 | UNREACHABLE(); | ||
| 3441 | return std::string("vec2(0)"); | ||
| 3442 | } | ||
| 3443 | }(); | ||
| 3444 | |||
| 3445 | const std::string second_pred = | ||
| 3446 | GetPredicateCondition(instr.hset2.pred39, instr.hset2.neg_pred != 0); | ||
| 3447 | |||
| 3448 | const std::string combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 3449 | |||
| 3450 | // HSET2 operates on each half float in the pack. | ||
| 3451 | std::string result; | ||
| 3452 | for (int i = 0; i < 2; ++i) { | ||
| 3453 | const std::string float_value = i == 0 ? "0x00003c00" : "0x3c000000"; | ||
| 3454 | const std::string integer_value = i == 0 ? "0x0000ffff" : "0xffff0000"; | ||
| 3455 | const std::string value = instr.hset2.bf == 1 ? float_value : integer_value; | ||
| 3456 | |||
| 3457 | const std::string comp = std::string(".") + "xy"[i]; | ||
| 3458 | const std::string predicate = | ||
| 3459 | "((" + GetPredicateComparison(instr.hset2.cond, op_a + comp, op_b + comp) + | ||
| 3460 | ") " + combiner + " (" + second_pred + "))"; | ||
| 3461 | |||
| 3462 | result += '(' + predicate + " ? " + value + " : 0)"; | ||
| 3463 | if (i == 0) { | ||
| 3464 | result += " | "; | ||
| 3465 | } | ||
| 3466 | } | ||
| 3467 | regs.SetRegisterToInteger(instr.gpr0, false, 0, '(' + result + ')', 1, 1); | ||
| 3468 | break; | ||
| 3469 | } | ||
| 3470 | case OpCode::Type::Xmad: { | ||
| 3471 | UNIMPLEMENTED_IF(instr.xmad.sign_a); | ||
| 3472 | UNIMPLEMENTED_IF(instr.xmad.sign_b); | ||
| 3473 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 3474 | "Condition codes generation in XMAD is partially implemented"); | ||
| 3475 | |||
| 3476 | std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)}; | ||
| 3477 | std::string op_b; | ||
| 3478 | std::string op_c; | ||
| 3479 | |||
| 3480 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 3481 | UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); | ||
| 3482 | const bool is_signed{instr.xmad.sign_a == 1}; | ||
| 3483 | |||
| 3484 | bool is_merge{}; | ||
| 3485 | switch (opcode->get().GetId()) { | ||
| 3486 | case OpCode::Id::XMAD_CR: { | ||
| 3487 | is_merge = instr.xmad.merge_56; | ||
| 3488 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3489 | instr.xmad.sign_b ? GLSLRegister::Type::Integer | ||
| 3490 | : GLSLRegister::Type::UnsignedInteger); | ||
| 3491 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 3492 | break; | ||
| 3493 | } | ||
| 3494 | case OpCode::Id::XMAD_RR: { | ||
| 3495 | is_merge = instr.xmad.merge_37; | ||
| 3496 | op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b); | ||
| 3497 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 3498 | break; | ||
| 3499 | } | ||
| 3500 | case OpCode::Id::XMAD_RC: { | ||
| 3501 | op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b); | ||
| 3502 | op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3503 | is_signed ? GLSLRegister::Type::Integer | ||
| 3504 | : GLSLRegister::Type::UnsignedInteger); | ||
| 3505 | break; | ||
| 3506 | } | ||
| 3507 | case OpCode::Id::XMAD_IMM: { | ||
| 3508 | is_merge = instr.xmad.merge_37; | ||
| 3509 | op_b += std::to_string(instr.xmad.imm20_16); | ||
| 3510 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 3511 | break; | ||
| 3512 | } | ||
| 3513 | default: { | ||
| 3514 | UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); | ||
| 3515 | } | ||
| 3516 | } | ||
| 3517 | |||
| 3518 | // TODO(bunnei): Ensure this is right with signed operands | ||
| 3519 | if (instr.xmad.high_a) { | ||
| 3520 | op_a = "((" + op_a + ") >> 16)"; | ||
| 3521 | } else { | ||
| 3522 | op_a = "((" + op_a + ") & 0xFFFF)"; | ||
| 3523 | } | ||
| 3524 | |||
| 3525 | std::string src2 = '(' + op_b + ')'; // Preserve original source 2 | ||
| 3526 | if (instr.xmad.high_b) { | ||
| 3527 | op_b = '(' + src2 + " >> 16)"; | ||
| 3528 | } else { | ||
| 3529 | op_b = '(' + src2 + " & 0xFFFF)"; | ||
| 3530 | } | ||
| 3531 | |||
| 3532 | std::string product = '(' + op_a + " * " + op_b + ')'; | ||
| 3533 | if (instr.xmad.product_shift_left) { | ||
| 3534 | product = '(' + product + " << 16)"; | ||
| 3535 | } | ||
| 3536 | |||
| 3537 | switch (instr.xmad.mode) { | ||
| 3538 | case Tegra::Shader::XmadMode::None: | ||
| 3539 | break; | ||
| 3540 | case Tegra::Shader::XmadMode::CLo: | ||
| 3541 | op_c = "((" + op_c + ") & 0xFFFF)"; | ||
| 3542 | break; | ||
| 3543 | case Tegra::Shader::XmadMode::CHi: | ||
| 3544 | op_c = "((" + op_c + ") >> 16)"; | ||
| 3545 | break; | ||
| 3546 | case Tegra::Shader::XmadMode::CBcc: | ||
| 3547 | op_c = "((" + op_c + ") + (" + src2 + "<< 16))"; | ||
| 3548 | break; | ||
| 3549 | default: { | ||
| 3550 | UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", | ||
| 3551 | static_cast<u32>(instr.xmad.mode.Value())); | ||
| 3552 | } | ||
| 3553 | } | ||
| 3554 | |||
| 3555 | std::string sum{'(' + product + " + " + op_c + ')'}; | ||
| 3556 | if (is_merge) { | ||
| 3557 | sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))"; | ||
| 3558 | } | ||
| 3559 | |||
| 3560 | regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1, false, | ||
| 3561 | instr.generates_cc); | ||
| 3562 | break; | ||
| 3563 | } | ||
| 3564 | default: { | ||
| 3565 | switch (opcode->get().GetId()) { | ||
| 3566 | case OpCode::Id::EXIT: { | ||
| 3567 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3568 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3569 | "EXIT condition code used: {}", static_cast<u32>(cc)); | ||
| 3570 | |||
| 3571 | if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 3572 | EmitFragmentOutputsWrite(); | ||
| 3573 | } | ||
| 3574 | |||
| 3575 | switch (instr.flow.cond) { | ||
| 3576 | case Tegra::Shader::FlowCondition::Always: | ||
| 3577 | shader.AddLine("return true;"); | ||
| 3578 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3579 | // If this is an unconditional exit then just end processing here, | ||
| 3580 | // otherwise we have to account for the possibility of the condition | ||
| 3581 | // not being met, so continue processing the next instruction. | ||
| 3582 | offset = PROGRAM_END - 1; | ||
| 3583 | } | ||
| 3584 | break; | ||
| 3585 | |||
| 3586 | case Tegra::Shader::FlowCondition::Fcsm_Tr: | ||
| 3587 | // TODO(bunnei): What is this used for? If we assume this conditon is not | ||
| 3588 | // satisifed, dual vertex shaders in Farming Simulator make more sense | ||
| 3589 | UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); | ||
| 3590 | break; | ||
| 3591 | |||
| 3592 | default: | ||
| 3593 | UNIMPLEMENTED_MSG("Unhandled flow condition: {}", | ||
| 3594 | static_cast<u32>(instr.flow.cond.Value())); | ||
| 3595 | } | ||
| 3596 | break; | ||
| 3597 | } | ||
| 3598 | case OpCode::Id::KIL: { | ||
| 3599 | UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); | ||
| 3600 | |||
| 3601 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3602 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3603 | "KIL condition code used: {}", static_cast<u32>(cc)); | ||
| 3604 | |||
| 3605 | // Enclose "discard" in a conditional, so that GLSL compilation does not complain | ||
| 3606 | // about unexecuted instructions that may follow this. | ||
| 3607 | shader.AddLine("if (true) {"); | ||
| 3608 | ++shader.scope; | ||
| 3609 | shader.AddLine("discard;"); | ||
| 3610 | --shader.scope; | ||
| 3611 | shader.AddLine("}"); | ||
| 3612 | |||
| 3613 | break; | ||
| 3614 | } | ||
| 3615 | case OpCode::Id::OUT_R: { | ||
| 3616 | UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, | ||
| 3617 | "Stream buffer is not supported"); | ||
| 3618 | ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry, | ||
| 3619 | "OUT is expected to be used in a geometry shader."); | ||
| 3620 | |||
| 3621 | if (instr.out.emit) { | ||
| 3622 | // gpr0 is used to store the next address. Hardware returns a pointer but | ||
| 3623 | // we just return the next index with a cyclic cap. | ||
| 3624 | const std::string current{regs.GetRegisterAsInteger(instr.gpr8, 0, false)}; | ||
| 3625 | const std::string next = "((" + current + " + 1" + ") % " + | ||
| 3626 | std::to_string(MAX_GEOMETRY_BUFFERS) + ')'; | ||
| 3627 | shader.AddLine("emit_vertex(" + current + ");"); | ||
| 3628 | regs.SetRegisterToInteger(instr.gpr0, false, 0, next, 1, 1); | ||
| 3629 | } | ||
| 3630 | if (instr.out.cut) { | ||
| 3631 | shader.AddLine("EndPrimitive();"); | ||
| 3632 | } | ||
| 3633 | |||
| 3634 | break; | ||
| 3635 | } | ||
| 3636 | case OpCode::Id::MOV_SYS: { | ||
| 3637 | switch (instr.sys20) { | ||
| 3638 | case Tegra::Shader::SystemVariable::InvocationInfo: { | ||
| 3639 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); | ||
| 3640 | regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1); | ||
| 3641 | break; | ||
| 3642 | } | ||
| 3643 | case Tegra::Shader::SystemVariable::Ydirection: { | ||
| 3644 | // Config pack's third value is Y_NEGATE's state. | ||
| 3645 | regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1); | ||
| 3646 | break; | ||
| 3647 | } | ||
| 3648 | default: { | ||
| 3649 | UNIMPLEMENTED_MSG("Unhandled system move: {}", | ||
| 3650 | static_cast<u32>(instr.sys20.Value())); | ||
| 3651 | } | ||
| 3652 | } | ||
| 3653 | break; | ||
| 3654 | } | ||
| 3655 | case OpCode::Id::ISBERD: { | ||
| 3656 | UNIMPLEMENTED_IF(instr.isberd.o != 0); | ||
| 3657 | UNIMPLEMENTED_IF(instr.isberd.skew != 0); | ||
| 3658 | UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); | ||
| 3659 | UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); | ||
| 3660 | ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry, | ||
| 3661 | "ISBERD is expected to be used in a geometry shader."); | ||
| 3662 | LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); | ||
| 3663 | regs.SetRegisterToFloat(instr.gpr0, 0, regs.GetRegisterAsFloat(instr.gpr8), 1, 1); | ||
| 3664 | break; | ||
| 3665 | } | ||
| 3666 | case OpCode::Id::BRA: { | ||
| 3667 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 3668 | "BRA with constant buffers are not implemented"); | ||
| 3669 | |||
| 3670 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3671 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 3672 | if (cc != Tegra::Shader::ConditionCode::T) { | ||
| 3673 | const std::string condition_code = regs.GetConditionCode(cc); | ||
| 3674 | shader.AddLine("if (" + condition_code + "){"); | ||
| 3675 | shader.scope++; | ||
| 3676 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | ||
| 3677 | shader.scope--; | ||
| 3678 | shader.AddLine('}'); | ||
| 3679 | } else { | ||
| 3680 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | ||
| 3681 | } | ||
| 3682 | break; | ||
| 3683 | } | ||
| 3684 | case OpCode::Id::IPA: { | ||
| 3685 | const auto& attribute = instr.attribute.fmt28; | ||
| 3686 | const auto& reg = instr.gpr0; | ||
| 3687 | |||
| 3688 | Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), | ||
| 3689 | instr.ipa.sample_mode.Value()}; | ||
| 3690 | regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index, | ||
| 3691 | input_mode); | ||
| 3692 | |||
| 3693 | if (instr.ipa.saturate) { | ||
| 3694 | regs.SetRegisterToFloat(reg, 0, regs.GetRegisterAsFloat(reg), 1, 1, true); | ||
| 3695 | } | ||
| 3696 | break; | ||
| 3697 | } | ||
| 3698 | case OpCode::Id::SSY: { | ||
| 3699 | // The SSY opcode tells the GPU where to re-converge divergent execution paths, it | ||
| 3700 | // sets the target of the jump that the SYNC instruction will make. The SSY opcode | ||
| 3701 | // has a similar structure to the BRA opcode. | ||
| 3702 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 3703 | "Constant buffer flow is not supported"); | ||
| 3704 | |||
| 3705 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 3706 | EmitPushToFlowStack(target); | ||
| 3707 | break; | ||
| 3708 | } | ||
| 3709 | case OpCode::Id::PBK: { | ||
| 3710 | // PBK pushes to a stack the address where BRK will jump to. This shares stack with | ||
| 3711 | // SSY but using SYNC on a PBK address will kill the shader execution. We don't | ||
| 3712 | // emulate this because it's very unlikely a driver will emit such invalid shader. | ||
| 3713 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 3714 | "Constant buffer PBK is not supported"); | ||
| 3715 | |||
| 3716 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 3717 | EmitPushToFlowStack(target); | ||
| 3718 | break; | ||
| 3719 | } | ||
| 3720 | case OpCode::Id::SYNC: { | ||
| 3721 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3722 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3723 | "SYNC condition code used: {}", static_cast<u32>(cc)); | ||
| 3724 | |||
| 3725 | // The SYNC opcode jumps to the address previously set by the SSY opcode | ||
| 3726 | EmitPopFromFlowStack(); | ||
| 3727 | break; | ||
| 3728 | } | ||
| 3729 | case OpCode::Id::BRK: { | ||
| 3730 | // The BRK opcode jumps to the address previously set by the PBK opcode | ||
| 3731 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3732 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3733 | "BRK condition code used: {}", static_cast<u32>(cc)); | ||
| 3734 | |||
| 3735 | EmitPopFromFlowStack(); | ||
| 3736 | break; | ||
| 3737 | } | ||
| 3738 | case OpCode::Id::DEPBAR: { | ||
| 3739 | // TODO(Subv): Find out if we actually have to care about this instruction or if | ||
| 3740 | // the GLSL compiler takes care of that for us. | ||
| 3741 | LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); | ||
| 3742 | break; | ||
| 3743 | } | ||
| 3744 | case OpCode::Id::VMAD: { | ||
| 3745 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 3746 | "Condition codes generation in VMAD is not implemented"); | ||
| 3747 | |||
| 3748 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 3749 | const std::string op_a = GetVideoOperandA(instr); | ||
| 3750 | const std::string op_b = GetVideoOperandB(instr); | ||
| 3751 | const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed); | ||
| 3752 | |||
| 3753 | std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | ||
| 3754 | |||
| 3755 | switch (instr.vmad.shr) { | ||
| 3756 | case Tegra::Shader::VmadShr::Shr7: | ||
| 3757 | result = '(' + result + " >> 7)"; | ||
| 3758 | break; | ||
| 3759 | case Tegra::Shader::VmadShr::Shr15: | ||
| 3760 | result = '(' + result + " >> 15)"; | ||
| 3761 | break; | ||
| 3762 | } | ||
| 3763 | |||
| 3764 | regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, | ||
| 3765 | instr.vmad.saturate, instr.vmad.cc); | ||
| 3766 | break; | ||
| 3767 | } | ||
| 3768 | case OpCode::Id::VSETP: { | ||
| 3769 | const std::string op_a = GetVideoOperandA(instr); | ||
| 3770 | const std::string op_b = GetVideoOperandB(instr); | ||
| 3771 | |||
| 3772 | // We can't use the constant predicate as destination. | ||
| 3773 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3774 | |||
| 3775 | const std::string second_pred = GetPredicateCondition(instr.vsetp.pred39, false); | ||
| 3776 | |||
| 3777 | const std::string combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 3778 | |||
| 3779 | const std::string predicate = GetPredicateComparison(instr.vsetp.cond, op_a, op_b); | ||
| 3780 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 3781 | SetPredicate(instr.vsetp.pred3, | ||
| 3782 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3783 | |||
| 3784 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3785 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 3786 | // if enabled | ||
| 3787 | SetPredicate(instr.vsetp.pred0, | ||
| 3788 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3789 | } | ||
| 3790 | break; | ||
| 3791 | } | ||
| 3792 | default: { | ||
| 3793 | UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); | ||
| 3794 | break; | ||
| 3795 | } | ||
| 3796 | } | ||
| 3797 | |||
| 3798 | break; | ||
| 3799 | } | ||
| 3800 | } | ||
| 3801 | |||
| 3802 | // Close the predicate condition scope. | ||
| 3803 | if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3804 | --shader.scope; | ||
| 3805 | shader.AddLine('}'); | ||
| 3806 | } | ||
| 3807 | |||
| 3808 | return offset + 1; | ||
| 3809 | } | ||
| 3810 | |||
| 3811 | /** | ||
| 3812 | * Compiles a range of instructions from Tegra to GLSL. | ||
| 3813 | * @param begin the offset of the starting instruction. | ||
| 3814 | * @param end the offset where the compilation should stop (exclusive). | ||
| 3815 | * @return the offset of the next instruction to compile. PROGRAM_END if the program | ||
| 3816 | * terminates. | ||
| 3817 | */ | ||
| 3818 | u32 CompileRange(u32 begin, u32 end) { | ||
| 3819 | u32 program_counter; | ||
| 3820 | for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) { | ||
| 3821 | program_counter = CompileInstr(program_counter); | ||
| 3822 | } | ||
| 3823 | return program_counter; | ||
| 3824 | } | ||
| 3825 | |||
| 3826 | void Generate(const std::string& suffix) { | ||
| 3827 | // Add declarations for all subroutines | ||
| 3828 | for (const auto& subroutine : subroutines) { | ||
| 3829 | shader.AddLine("bool " + subroutine.GetName() + "();"); | ||
| 3830 | } | ||
| 3831 | shader.AddNewLine(); | ||
| 3832 | |||
| 3833 | // Add the main entry point | ||
| 3834 | shader.AddLine("bool exec_" + suffix + "() {"); | ||
| 3835 | ++shader.scope; | ||
| 3836 | CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); | ||
| 3837 | --shader.scope; | ||
| 3838 | shader.AddLine("}\n"); | ||
| 3839 | |||
| 3840 | // Add definitions for all subroutines | ||
| 3841 | for (const auto& subroutine : subroutines) { | ||
| 3842 | std::set<u32> labels = subroutine.labels; | ||
| 3843 | |||
| 3844 | shader.AddLine("bool " + subroutine.GetName() + "() {"); | ||
| 3845 | ++shader.scope; | ||
| 3846 | |||
| 3847 | if (labels.empty()) { | ||
| 3848 | if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) { | ||
| 3849 | shader.AddLine("return false;"); | ||
| 3850 | } | ||
| 3851 | } else { | ||
| 3852 | labels.insert(subroutine.begin); | ||
| 3853 | shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); | ||
| 3854 | |||
| 3855 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | ||
| 3856 | // unlikely that shaders will use 20 nested SSYs and PBKs. | ||
| 3857 | constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 3858 | shader.AddLine("uint flow_stack[" + std::to_string(FLOW_STACK_SIZE) + "];"); | ||
| 3859 | shader.AddLine("uint flow_stack_top = 0u;"); | ||
| 3860 | |||
| 3861 | shader.AddLine("while (true) {"); | ||
| 3862 | ++shader.scope; | ||
| 3863 | |||
| 3864 | shader.AddLine("switch (jmp_to) {"); | ||
| 3865 | |||
| 3866 | for (auto label : labels) { | ||
| 3867 | shader.AddLine("case " + std::to_string(label) + "u: {"); | ||
| 3868 | ++shader.scope; | ||
| 3869 | |||
| 3870 | const auto next_it = labels.lower_bound(label + 1); | ||
| 3871 | const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; | ||
| 3872 | |||
| 3873 | const u32 compile_end = CompileRange(label, next_label); | ||
| 3874 | if (compile_end > next_label && compile_end != PROGRAM_END) { | ||
| 3875 | // This happens only when there is a label inside a IF/LOOP block | ||
| 3876 | shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }"); | ||
| 3877 | labels.emplace(compile_end); | ||
| 3878 | } | ||
| 3879 | |||
| 3880 | --shader.scope; | ||
| 3881 | shader.AddLine('}'); | ||
| 3882 | } | ||
| 3883 | |||
| 3884 | shader.AddLine("default: return false;"); | ||
| 3885 | shader.AddLine('}'); | ||
| 3886 | |||
| 3887 | --shader.scope; | ||
| 3888 | shader.AddLine('}'); | ||
| 3889 | |||
| 3890 | shader.AddLine("return false;"); | ||
| 3891 | } | ||
| 3892 | |||
| 3893 | --shader.scope; | ||
| 3894 | shader.AddLine("}\n"); | ||
| 3895 | |||
| 3896 | DEBUG_ASSERT(shader.scope == 0); | ||
| 3897 | } | ||
| 3898 | |||
| 3899 | GenerateDeclarations(); | ||
| 3900 | } | ||
| 3901 | |||
| 3902 | /// Add declarations for registers | ||
| 3903 | void GenerateDeclarations() { | ||
| 3904 | regs.GenerateDeclarations(suffix); | ||
| 3905 | |||
| 3906 | for (const auto& pred : declr_predicates) { | ||
| 3907 | declarations.AddLine("bool " + pred + " = false;"); | ||
| 3908 | } | ||
| 3909 | declarations.AddNewLine(); | ||
| 3910 | } | ||
| 3911 | |||
| 3912 | private: | ||
| 3913 | const std::set<Subroutine>& subroutines; | ||
| 3914 | const ProgramCode& program_code; | ||
| 3915 | Tegra::Shader::Header header; | ||
| 3916 | const u32 main_offset; | ||
| 3917 | Maxwell3D::Regs::ShaderStage stage; | ||
| 3918 | const std::string& suffix; | ||
| 3919 | u64 local_memory_size; | ||
| 3920 | std::size_t shader_length; | ||
| 3921 | |||
| 3922 | ShaderWriter shader; | ||
| 3923 | ShaderWriter declarations; | ||
| 3924 | GLSLRegisterManager regs{shader, declarations, stage, suffix, header}; | ||
| 3925 | |||
| 3926 | // Declarations | ||
| 3927 | std::set<std::string> declr_predicates; | ||
| 3928 | }; // namespace OpenGL::GLShader::Decompiler | ||
| 3929 | |||
| 3930 | std::string GetCommonDeclarations() { | ||
| 3931 | return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n", | ||
| 3932 | RasterizerOpenGL::MaxConstbufferSize / sizeof(GLvec4)); | ||
| 3933 | } | ||
| 3934 | |||
| 3935 | std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, | ||
| 3936 | Maxwell3D::Regs::ShaderStage stage, | ||
| 3937 | const std::string& suffix) { | ||
| 3938 | try { | ||
| 3939 | ControlFlowAnalyzer analyzer(program_code, main_offset, suffix); | ||
| 3940 | const auto subroutines = analyzer.GetSubroutines(); | ||
| 3941 | GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix, | ||
| 3942 | analyzer.GetShaderLength()); | ||
| 3943 | return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; | ||
| 3944 | } catch (const DecompileFail& exception) { | ||
| 3945 | LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); | ||
| 3946 | } | ||
| 3947 | return {}; | ||
| 3948 | } | ||
| 3949 | |||
| 3950 | } // namespace OpenGL::GLShader::Decompiler | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index d01a4a7ee..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null | |||
| @@ -1,25 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <functional> | ||
| 9 | #include <optional> | ||
| 10 | #include <string> | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 14 | |||
| 15 | namespace OpenGL::GLShader::Decompiler { | ||
| 16 | |||
| 17 | using Tegra::Engines::Maxwell3D; | ||
| 18 | |||
| 19 | std::string GetCommonDeclarations(); | ||
| 20 | |||
| 21 | std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, | ||
| 22 | Maxwell3D::Regs::ShaderStage stage, | ||
| 23 | const std::string& suffix); | ||
| 24 | |||
| 25 | } // namespace OpenGL::GLShader::Decompiler | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 5d0819dc5..59f45cde3 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -5,24 +5,27 @@ | |||
| 5 | #include <fmt/format.h> | 5 | #include <fmt/format.h> |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "video_core/engines/maxwell_3d.h" | 7 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 8 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 9 | #include "video_core/shader/glsl_decompiler.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 10 | 11 | ||
| 11 | namespace OpenGL::GLShader { | 12 | namespace OpenGL::GLShader { |
| 12 | 13 | ||
| 13 | using Tegra::Engines::Maxwell3D; | 14 | using Tegra::Engines::Maxwell3D; |
| 15 | using VideoCommon::Shader::ProgramCode; | ||
| 16 | using VideoCommon::Shader::ShaderIR; | ||
| 14 | 17 | ||
| 15 | static constexpr u32 PROGRAM_OFFSET{10}; | 18 | static constexpr u32 PROGRAM_OFFSET{10}; |
| 16 | 19 | ||
| 17 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { | 20 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { |
| 18 | std::string out = "#version 430 core\n"; | ||
| 19 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 21 | out += "// Shader Unique Id: VS" + id + "\n\n"; | ||
| 22 | out += Decompiler::GetCommonDeclarations(); | ||
| 23 | 22 | ||
| 24 | out += R"( | 23 | std::string out = "#version 430 core\n"; |
| 24 | out += "// Shader Unique Id: VS" + id + '\n'; | ||
| 25 | out += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||
| 26 | out += GetCommonDeclarations(); | ||
| 25 | 27 | ||
| 28 | out += R"( | ||
| 26 | layout (location = 0) out vec4 position; | 29 | layout (location = 0) out vec4 position; |
| 27 | 30 | ||
| 28 | layout(std140) uniform vs_config { | 31 | layout(std140) uniform vs_config { |
| @@ -31,39 +34,30 @@ layout(std140) uniform vs_config { | |||
| 31 | uvec4 alpha_test; | 34 | uvec4 alpha_test; |
| 32 | }; | 35 | }; |
| 33 | )"; | 36 | )"; |
| 34 | 37 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | |
| 35 | if (setup.IsDualProgram()) { | 38 | ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); |
| 36 | out += "bool exec_vertex_b();\n"; | ||
| 37 | } | ||
| 38 | |||
| 39 | ProgramResult program = | ||
| 40 | Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, | ||
| 41 | Maxwell3D::Regs::ShaderStage::Vertex, "vertex") | ||
| 42 | .value_or(ProgramResult()); | ||
| 43 | 39 | ||
| 44 | out += program.first; | 40 | out += program.first; |
| 45 | 41 | ||
| 46 | if (setup.IsDualProgram()) { | 42 | if (setup.IsDualProgram()) { |
| 43 | ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); | ||
| 47 | ProgramResult program_b = | 44 | ProgramResult program_b = |
| 48 | Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET, | 45 | Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); |
| 49 | Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b") | 46 | |
| 50 | .value_or(ProgramResult()); | ||
| 51 | out += program_b.first; | 47 | out += program_b.first; |
| 52 | } | 48 | } |
| 53 | 49 | ||
| 54 | out += R"( | 50 | out += R"( |
| 55 | |||
| 56 | void main() { | 51 | void main() { |
| 57 | position = vec4(0.0, 0.0, 0.0, 0.0); | 52 | position = vec4(0.0, 0.0, 0.0, 0.0); |
| 58 | exec_vertex(); | 53 | execute_vertex(); |
| 59 | )"; | 54 | )"; |
| 60 | 55 | ||
| 61 | if (setup.IsDualProgram()) { | 56 | if (setup.IsDualProgram()) { |
| 62 | out += " exec_vertex_b();"; | 57 | out += " execute_vertex_b();"; |
| 63 | } | 58 | } |
| 64 | 59 | ||
| 65 | out += R"( | 60 | out += R"( |
| 66 | |||
| 67 | // Check if the flip stage is VertexB | 61 | // Check if the flip stage is VertexB |
| 68 | // Config pack's second value is flip_stage | 62 | // Config pack's second value is flip_stage |
| 69 | if (config_pack[1] == 1) { | 63 | if (config_pack[1] == 1) { |
| @@ -77,25 +71,23 @@ void main() { | |||
| 77 | if (config_pack[1] == 1) { | 71 | if (config_pack[1] == 1) { |
| 78 | position.w = 1.0; | 72 | position.w = 1.0; |
| 79 | } | 73 | } |
| 80 | } | 74 | })"; |
| 81 | |||
| 82 | )"; | ||
| 83 | 75 | ||
| 84 | return {out, program.second}; | 76 | return {out, program.second}; |
| 85 | } | 77 | } |
| 86 | 78 | ||
| 87 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { | 79 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { |
| 88 | // Version is intentionally skipped in shader generation, it's added by the lazy compilation. | 80 | // Version is intentionally skipped in shader generation, it's added by the lazy compilation. |
| 89 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 90 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 81 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 91 | out += "// Shader Unique Id: GS" + id + "\n\n"; | ||
| 92 | out += Decompiler::GetCommonDeclarations(); | ||
| 93 | out += "bool exec_geometry();\n"; | ||
| 94 | 82 | ||
| 83 | std::string out = out += "// Shader Unique Id: GS" + id + '\n'; | ||
| 84 | out += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||
| 85 | out += GetCommonDeclarations(); | ||
| 86 | |||
| 87 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||
| 95 | ProgramResult program = | 88 | ProgramResult program = |
| 96 | Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, | 89 | Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); |
| 97 | Maxwell3D::Regs::ShaderStage::Geometry, "geometry") | 90 | |
| 98 | .value_or(ProgramResult()); | ||
| 99 | out += R"( | 91 | out += R"( |
| 100 | out gl_PerVertex { | 92 | out gl_PerVertex { |
| 101 | vec4 gl_Position; | 93 | vec4 gl_Position; |
| @@ -109,28 +101,26 @@ layout (std140) uniform gs_config { | |||
| 109 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | 101 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 110 | uvec4 alpha_test; | 102 | uvec4 alpha_test; |
| 111 | }; | 103 | }; |
| 104 | )"; | ||
| 105 | |||
| 106 | out += program.first; | ||
| 112 | 107 | ||
| 108 | out = R"( | ||
| 113 | void main() { | 109 | void main() { |
| 114 | exec_geometry(); | 110 | execute_geometry(); |
| 115 | } | 111 | };)"; |
| 116 | 112 | ||
| 117 | )"; | ||
| 118 | out += program.first; | ||
| 119 | return {out, program.second}; | 113 | return {out, program.second}; |
| 120 | } | 114 | } |
| 121 | 115 | ||
| 122 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { | 116 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { |
| 123 | std::string out = "#version 430 core\n"; | ||
| 124 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 125 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 117 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 126 | out += "// Shader Unique Id: FS" + id + "\n\n"; | ||
| 127 | out += Decompiler::GetCommonDeclarations(); | ||
| 128 | out += "bool exec_fragment();\n"; | ||
| 129 | 118 | ||
| 130 | ProgramResult program = | 119 | std::string out = "#version 430 core\n"; |
| 131 | Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, | 120 | out += "// Shader Unique Id: FS" + id + '\n'; |
| 132 | Maxwell3D::Regs::ShaderStage::Fragment, "fragment") | 121 | out += "#extension GL_ARB_separate_shader_objects : enable\n"; |
| 133 | .value_or(ProgramResult()); | 122 | out += GetCommonDeclarations(); |
| 123 | |||
| 134 | out += R"( | 124 | out += R"( |
| 135 | layout(location = 0) out vec4 FragColor0; | 125 | layout(location = 0) out vec4 FragColor0; |
| 136 | layout(location = 1) out vec4 FragColor1; | 126 | layout(location = 1) out vec4 FragColor1; |
| @@ -171,14 +161,20 @@ bool AlphaFunc(in float value) { | |||
| 171 | default: | 161 | default: |
| 172 | return false; | 162 | return false; |
| 173 | } | 163 | } |
| 174 | } | 164 | })"; |
| 165 | |||
| 166 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||
| 167 | ProgramResult program = | ||
| 168 | Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||
| 175 | 169 | ||
| 170 | out += program.first; | ||
| 171 | |||
| 172 | out += R"( | ||
| 176 | void main() { | 173 | void main() { |
| 177 | exec_fragment(); | 174 | execute_fragment(); |
| 178 | } | 175 | } |
| 179 | 176 | ||
| 180 | )"; | 177 | )"; |
| 181 | out += program.first; | ||
| 182 | return {out, program.second}; | 178 | return {out, program.second}; |
| 183 | } | 179 | } |
| 184 | } // namespace OpenGL::GLShader | 180 | } // namespace OpenGL::GLShader \ No newline at end of file |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index fcc20d3b4..b14bdb29c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -10,164 +10,12 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/shader/glsl_decompiler.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 13 | 15 | ||
| 14 | namespace OpenGL::GLShader { | 16 | namespace OpenGL::GLShader { |
| 15 | 17 | ||
| 16 | constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; | 18 | using VideoCommon::Shader::ProgramCode; |
| 17 | using ProgramCode = std::vector<u64>; | ||
| 18 | |||
| 19 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; | ||
| 20 | |||
| 21 | class ConstBufferEntry { | ||
| 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 23 | |||
| 24 | public: | ||
| 25 | void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) { | ||
| 26 | is_used = true; | ||
| 27 | this->index = static_cast<unsigned>(index); | ||
| 28 | this->stage = stage; | ||
| 29 | max_offset = std::max(max_offset, static_cast<unsigned>(offset)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) { | ||
| 33 | is_used = true; | ||
| 34 | is_indirect = true; | ||
| 35 | this->index = static_cast<unsigned>(index); | ||
| 36 | this->stage = stage; | ||
| 37 | } | ||
| 38 | |||
| 39 | bool IsUsed() const { | ||
| 40 | return is_used; | ||
| 41 | } | ||
| 42 | |||
| 43 | bool IsIndirect() const { | ||
| 44 | return is_indirect; | ||
| 45 | } | ||
| 46 | |||
| 47 | unsigned GetIndex() const { | ||
| 48 | return index; | ||
| 49 | } | ||
| 50 | |||
| 51 | unsigned GetSize() const { | ||
| 52 | return max_offset + 1; | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string GetName() const { | ||
| 56 | return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index); | ||
| 57 | } | ||
| 58 | |||
| 59 | u32 GetHash() const { | ||
| 60 | return (static_cast<u32>(stage) << 16) | index; | ||
| 61 | } | ||
| 62 | |||
| 63 | private: | ||
| 64 | static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = { | ||
| 65 | "buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c", | ||
| 66 | }; | ||
| 67 | |||
| 68 | bool is_used{}; | ||
| 69 | bool is_indirect{}; | ||
| 70 | unsigned index{}; | ||
| 71 | unsigned max_offset{}; | ||
| 72 | Maxwell::ShaderStage stage; | ||
| 73 | }; | ||
| 74 | |||
| 75 | class SamplerEntry { | ||
| 76 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 77 | |||
| 78 | public: | ||
| 79 | SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index, | ||
| 80 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow) | ||
| 81 | : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array), | ||
| 82 | is_shadow(is_shadow) {} | ||
| 83 | |||
| 84 | std::size_t GetOffset() const { | ||
| 85 | return offset; | ||
| 86 | } | ||
| 87 | |||
| 88 | std::size_t GetIndex() const { | ||
| 89 | return sampler_index; | ||
| 90 | } | ||
| 91 | |||
| 92 | Maxwell::ShaderStage GetStage() const { | ||
| 93 | return stage; | ||
| 94 | } | ||
| 95 | |||
| 96 | std::string GetName() const { | ||
| 97 | return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' + | ||
| 98 | std::to_string(sampler_index); | ||
| 99 | } | ||
| 100 | |||
| 101 | std::string GetTypeString() const { | ||
| 102 | using Tegra::Shader::TextureType; | ||
| 103 | std::string glsl_type; | ||
| 104 | |||
| 105 | switch (type) { | ||
| 106 | case TextureType::Texture1D: | ||
| 107 | glsl_type = "sampler1D"; | ||
| 108 | break; | ||
| 109 | case TextureType::Texture2D: | ||
| 110 | glsl_type = "sampler2D"; | ||
| 111 | break; | ||
| 112 | case TextureType::Texture3D: | ||
| 113 | glsl_type = "sampler3D"; | ||
| 114 | break; | ||
| 115 | case TextureType::TextureCube: | ||
| 116 | glsl_type = "samplerCube"; | ||
| 117 | break; | ||
| 118 | default: | ||
| 119 | UNIMPLEMENTED(); | ||
| 120 | } | ||
| 121 | if (is_array) | ||
| 122 | glsl_type += "Array"; | ||
| 123 | if (is_shadow) | ||
| 124 | glsl_type += "Shadow"; | ||
| 125 | return glsl_type; | ||
| 126 | } | ||
| 127 | |||
| 128 | Tegra::Shader::TextureType GetType() const { | ||
| 129 | return type; | ||
| 130 | } | ||
| 131 | |||
| 132 | bool IsArray() const { | ||
| 133 | return is_array; | ||
| 134 | } | ||
| 135 | |||
| 136 | bool IsShadow() const { | ||
| 137 | return is_shadow; | ||
| 138 | } | ||
| 139 | |||
| 140 | u32 GetHash() const { | ||
| 141 | return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index); | ||
| 142 | } | ||
| 143 | |||
| 144 | static std::string GetArrayName(Maxwell::ShaderStage stage) { | ||
| 145 | return TextureSamplerNames[static_cast<std::size_t>(stage)]; | ||
| 146 | } | ||
| 147 | |||
| 148 | private: | ||
| 149 | static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = { | ||
| 150 | "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs", | ||
| 151 | }; | ||
| 152 | |||
| 153 | /// Offset in TSC memory from which to read the sampler object, as specified by the sampling | ||
| 154 | /// instruction. | ||
| 155 | std::size_t offset; | ||
| 156 | Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. | ||
| 157 | std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. | ||
| 158 | Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) | ||
| 159 | bool is_array; ///< Whether the texture is being sampled as an array texture or not. | ||
| 160 | bool is_shadow; ///< Whether the texture is being sampled as a depth texture or not. | ||
| 161 | }; | ||
| 162 | |||
| 163 | struct ShaderEntries { | ||
| 164 | std::vector<ConstBufferEntry> const_buffer_entries; | ||
| 165 | std::vector<SamplerEntry> texture_samplers; | ||
| 166 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances; | ||
| 167 | std::size_t shader_length; | ||
| 168 | }; | ||
| 169 | |||
| 170 | using ProgramResult = std::pair<std::string, ShaderEntries>; | ||
| 171 | 19 | ||
| 172 | struct ShaderSetup { | 20 | struct ShaderSetup { |
| 173 | explicit ShaderSetup(ProgramCode program_code) { | 21 | explicit ShaderSetup(ProgramCode program_code) { |