diff options
| author | 2020-06-03 18:07:35 -0300 | |
|---|---|---|
| committer | 2020-06-11 22:12:07 -0300 | |
| commit | a63a0daa5e773574019ec521c0a07096efbdcd36 (patch) | |
| tree | bbf61edb6185f0d5cd7c2bc79d91e39fa47cb45b | |
| parent | yuzu/configuration: Show assembly shaders check box (diff) | |
| download | yuzu-a63a0daa5e773574019ec521c0a07096efbdcd36.tar.gz yuzu-a63a0daa5e773574019ec521c0a07096efbdcd36.tar.xz yuzu-a63a0daa5e773574019ec521c0a07096efbdcd36.zip | |
gl_arb_decompiler: Implement an assembly shader decompiler
Emit code compatible with NV_gpu_program5.
This should emit code compatible with Fermi, but it wasn't tested on
that architecture. Pascal has some issues not present on Turing GPUs.
Diffstat (limited to '')
| -rw-r--r-- | CMakeModules/GenerateSCMRev.cmake | 2 | ||||
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.cpp | 2051 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.h | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 |
8 files changed, 2095 insertions, 1 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 83e4e9df2..311ba1c2e 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -51,6 +51,8 @@ endif() | |||
| 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) | 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) |
| 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") | 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") |
| 53 | set(HASH_FILES | 53 | set(HASH_FILES |
| 54 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 55 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 54 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | 56 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |
| 55 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | 57 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |
| 56 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | 58 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 24b7a083c..0a3e2f4d1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 32 | DEPENDS | 32 | DEPENDS |
| 33 | # WARNING! It was too much work to try and make a common location for this list, | 33 | # WARNING! It was too much work to try and make a common location for this list, |
| 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well | 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well |
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |
| 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2bf8d68ce..2af713af2 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -54,6 +54,8 @@ add_library(video_core STATIC | |||
| 54 | rasterizer_interface.h | 54 | rasterizer_interface.h |
| 55 | renderer_base.cpp | 55 | renderer_base.cpp |
| 56 | renderer_base.h | 56 | renderer_base.h |
| 57 | renderer_opengl/gl_arb_decompiler.cpp | ||
| 58 | renderer_opengl/gl_arb_decompiler.h | ||
| 57 | renderer_opengl/gl_buffer_cache.cpp | 59 | renderer_opengl/gl_buffer_cache.cpp |
| 58 | renderer_opengl/gl_buffer_cache.h | 60 | renderer_opengl/gl_buffer_cache.h |
| 59 | renderer_opengl/gl_device.cpp | 61 | renderer_opengl/gl_device.cpp |
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp new file mode 100644 index 000000000..6a23221bb --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp | |||
| @@ -0,0 +1,2051 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <variant> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/assert.h" | ||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 20 | #include "video_core/shader/registry.h" | ||
| 21 | #include "video_core/shader/shader_ir.h" | ||
| 22 | |||
| 23 | // Predicates in the decompiled code follow the convention that -1 means true and 0 means false. | ||
| 24 | // GLASM lacks booleans, so they have to be implemented as integers. | ||
| 25 | // Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to | ||
| 26 | // select between two values, because -1 will be evaluated as true and 0 as false. | ||
| 27 | |||
| 28 | namespace OpenGL { | ||
| 29 | |||
| 30 | namespace { | ||
| 31 | |||
| 32 | using Tegra::Engines::ShaderType; | ||
| 33 | using Tegra::Shader::Attribute; | ||
| 34 | using Tegra::Shader::PixelImap; | ||
| 35 | using Tegra::Shader::Register; | ||
| 36 | using namespace VideoCommon::Shader; | ||
| 37 | using Operation = const OperationNode&; | ||
| 38 | |||
| 39 | constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; | ||
| 40 | |||
| 41 | char Swizzle(std::size_t component) { | ||
| 42 | ASSERT(component < 4); | ||
| 43 | return component["xyzw"]; | ||
| 44 | } | ||
| 45 | |||
| 46 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 47 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 48 | } | ||
| 49 | |||
| 50 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 51 | ASSERT(IsGenericAttribute(index)); | ||
| 52 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string_view Modifiers(Operation operation) { | ||
| 56 | const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); | ||
| 57 | if (meta && meta->precise) { | ||
| 58 | return ".PREC"; | ||
| 59 | } | ||
| 60 | return ""; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view GetInputFlags(PixelImap attribute) { | ||
| 64 | switch (attribute) { | ||
| 65 | case PixelImap::Perspective: | ||
| 66 | return ""; | ||
| 67 | case PixelImap::Constant: | ||
| 68 | return "FLAT "; | ||
| 69 | case PixelImap::ScreenLinear: | ||
| 70 | return "NOPERSPECTIVE "; | ||
| 71 | case PixelImap::Unused: | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); | ||
| 75 | return {}; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string_view ImageType(Tegra::Shader::ImageType image_type) { | ||
| 79 | switch (image_type) { | ||
| 80 | case Tegra::Shader::ImageType::Texture1D: | ||
| 81 | return "1D"; | ||
| 82 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 83 | return "BUFFER"; | ||
| 84 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 85 | return "ARRAY1D"; | ||
| 86 | case Tegra::Shader::ImageType::Texture2D: | ||
| 87 | return "2D"; | ||
| 88 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 89 | return "ARRAY2D"; | ||
| 90 | case Tegra::Shader::ImageType::Texture3D: | ||
| 91 | return "3D"; | ||
| 92 | } | ||
| 93 | UNREACHABLE(); | ||
| 94 | return {}; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::string_view StackName(MetaStackClass stack) { | ||
| 98 | switch (stack) { | ||
| 99 | case MetaStackClass::Ssy: | ||
| 100 | return "SSY"; | ||
| 101 | case MetaStackClass::Pbk: | ||
| 102 | return "PBK"; | ||
| 103 | } | ||
| 104 | UNREACHABLE(); | ||
| 105 | return ""; | ||
| 106 | }; | ||
| 107 | |||
| 108 | std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | ||
| 109 | switch (topology) { | ||
| 110 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: | ||
| 111 | return "POINTS"; | ||
| 112 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: | ||
| 113 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: | ||
| 114 | return "LINES"; | ||
| 115 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: | ||
| 116 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: | ||
| 117 | return "LINES_ADJACENCY"; | ||
| 118 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: | ||
| 119 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | ||
| 120 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | ||
| 121 | return "TRIANGLES"; | ||
| 122 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | ||
| 123 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||
| 124 | return "TRIANGLES_ADJACENCY"; | ||
| 125 | default: | ||
| 126 | UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); | ||
| 127 | return "POINTS"; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 132 | switch (topology) { | ||
| 133 | case Tegra::Shader::OutputTopology::PointList: | ||
| 134 | return "POINTS"; | ||
| 135 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 136 | return "LINE_STRIP"; | ||
| 137 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 138 | return "TRIANGLE_STRIP"; | ||
| 139 | default: | ||
| 140 | UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); | ||
| 141 | return "points"; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | std::string_view StageInputName(ShaderType stage) { | ||
| 146 | switch (stage) { | ||
| 147 | case ShaderType::Vertex: | ||
| 148 | case ShaderType::Geometry: | ||
| 149 | return "vertex"; | ||
| 150 | case ShaderType::Fragment: | ||
| 151 | return "fragment"; | ||
| 152 | case ShaderType::Compute: | ||
| 153 | return "invocation"; | ||
| 154 | default: | ||
| 155 | UNREACHABLE(); | ||
| 156 | return ""; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | std::string TextureType(const MetaTexture& meta) { | ||
| 161 | if (meta.sampler.is_buffer) { | ||
| 162 | return "BUFFER"; | ||
| 163 | } | ||
| 164 | std::string type; | ||
| 165 | if (meta.sampler.is_shadow) { | ||
| 166 | type += "SHADOW"; | ||
| 167 | } | ||
| 168 | if (meta.sampler.is_array) { | ||
| 169 | type += "ARRAY"; | ||
| 170 | } | ||
| 171 | type += [&meta] { | ||
| 172 | switch (meta.sampler.type) { | ||
| 173 | case Tegra::Shader::TextureType::Texture1D: | ||
| 174 | return "1D"; | ||
| 175 | case Tegra::Shader::TextureType::Texture2D: | ||
| 176 | return "2D"; | ||
| 177 | case Tegra::Shader::TextureType::Texture3D: | ||
| 178 | return "3D"; | ||
| 179 | case Tegra::Shader::TextureType::TextureCube: | ||
| 180 | return "CUBE"; | ||
| 181 | } | ||
| 182 | UNREACHABLE(); | ||
| 183 | return "2D"; | ||
| 184 | }(); | ||
| 185 | return type; | ||
| 186 | } | ||
| 187 | |||
| 188 | std::string GlobalMemoryName(const GlobalMemoryBase& base) { | ||
| 189 | return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); | ||
| 190 | } | ||
| 191 | |||
| 192 | class ARBDecompiler final { | ||
| 193 | public: | ||
| 194 | explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 195 | ShaderType stage, std::string_view identifier); | ||
| 196 | |||
| 197 | std::string Code() const { | ||
| 198 | return shader_source; | ||
| 199 | } | ||
| 200 | |||
| 201 | private: | ||
| 202 | void DeclareHeader(); | ||
| 203 | void DeclareVertex(); | ||
| 204 | void DeclareGeometry(); | ||
| 205 | void DeclareFragment(); | ||
| 206 | void DeclareCompute(); | ||
| 207 | void DeclareInputAttributes(); | ||
| 208 | void DeclareOutputAttributes(); | ||
| 209 | void DeclareLocalMemory(); | ||
| 210 | void DeclareGlobalMemory(); | ||
| 211 | void DeclareConstantBuffers(); | ||
| 212 | void DeclareRegisters(); | ||
| 213 | void DeclareTemporaries(); | ||
| 214 | void DeclarePredicates(); | ||
| 215 | void DeclareInternalFlags(); | ||
| 216 | |||
| 217 | void InitializeVariables(); | ||
| 218 | |||
| 219 | void DecompileAST(); | ||
| 220 | void DecompileBranchMode(); | ||
| 221 | |||
| 222 | void VisitAST(const ASTNode& node); | ||
| 223 | std::string VisitExpression(const Expr& node); | ||
| 224 | |||
| 225 | void VisitBlock(const NodeBlock& bb); | ||
| 226 | |||
| 227 | std::string Visit(const Node& node); | ||
| 228 | |||
| 229 | std::pair<std::string, std::size_t> BuildCoords(Operation); | ||
| 230 | std::string BuildAoffi(Operation); | ||
| 231 | void Exit(); | ||
| 232 | |||
| 233 | std::string Assign(Operation); | ||
| 234 | std::string Select(Operation); | ||
| 235 | std::string FClamp(Operation); | ||
| 236 | std::string FCastHalf0(Operation); | ||
| 237 | std::string FCastHalf1(Operation); | ||
| 238 | std::string FSqrt(Operation); | ||
| 239 | std::string FSwizzleAdd(Operation); | ||
| 240 | std::string HAdd2(Operation); | ||
| 241 | std::string HMul2(Operation); | ||
| 242 | std::string HFma2(Operation); | ||
| 243 | std::string HAbsolute(Operation); | ||
| 244 | std::string HNegate(Operation); | ||
| 245 | std::string HClamp(Operation); | ||
| 246 | std::string HCastFloat(Operation); | ||
| 247 | std::string HUnpack(Operation); | ||
| 248 | std::string HMergeF32(Operation); | ||
| 249 | std::string HMergeH0(Operation); | ||
| 250 | std::string HMergeH1(Operation); | ||
| 251 | std::string HPack2(Operation); | ||
| 252 | std::string LogicalAssign(Operation); | ||
| 253 | std::string LogicalPick2(Operation); | ||
| 254 | std::string LogicalAnd2(Operation); | ||
| 255 | std::string FloatOrdered(Operation); | ||
| 256 | std::string FloatUnordered(Operation); | ||
| 257 | std::string LogicalAddCarry(Operation); | ||
| 258 | std::string Texture(Operation); | ||
| 259 | std::string TextureGather(Operation); | ||
| 260 | std::string TextureQueryDimensions(Operation); | ||
| 261 | std::string TextureQueryLod(Operation); | ||
| 262 | std::string TexelFetch(Operation); | ||
| 263 | std::string TextureGradient(Operation); | ||
| 264 | std::string ImageLoad(Operation); | ||
| 265 | std::string ImageStore(Operation); | ||
| 266 | std::string Branch(Operation); | ||
| 267 | std::string BranchIndirect(Operation); | ||
| 268 | std::string PushFlowStack(Operation); | ||
| 269 | std::string PopFlowStack(Operation); | ||
| 270 | std::string Exit(Operation); | ||
| 271 | std::string Discard(Operation); | ||
| 272 | std::string EmitVertex(Operation); | ||
| 273 | std::string EndPrimitive(Operation); | ||
| 274 | std::string InvocationId(Operation); | ||
| 275 | std::string YNegate(Operation); | ||
| 276 | std::string ThreadId(Operation); | ||
| 277 | std::string ShuffleIndexed(Operation); | ||
| 278 | std::string Barrier(Operation); | ||
| 279 | std::string MemoryBarrierGroup(Operation); | ||
| 280 | std::string MemoryBarrierGlobal(Operation); | ||
| 281 | |||
| 282 | template <const std::string_view& op> | ||
| 283 | std::string Unary(Operation operation) { | ||
| 284 | const std::string temporary = AllocTemporary(); | ||
| 285 | AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); | ||
| 286 | return temporary; | ||
| 287 | } | ||
| 288 | |||
| 289 | template <const std::string_view& op> | ||
| 290 | std::string Binary(Operation operation) { | ||
| 291 | const std::string temporary = AllocTemporary(); | ||
| 292 | AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 293 | Visit(operation[1])); | ||
| 294 | return temporary; | ||
| 295 | } | ||
| 296 | |||
| 297 | template <const std::string_view& op> | ||
| 298 | std::string Trinary(Operation operation) { | ||
| 299 | const std::string temporary = AllocTemporary(); | ||
| 300 | AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 301 | Visit(operation[1]), Visit(operation[2])); | ||
| 302 | return temporary; | ||
| 303 | } | ||
| 304 | |||
| 305 | template <const std::string_view& op, bool unordered> | ||
| 306 | std::string FloatComparison(Operation operation) { | ||
| 307 | const std::string temporary = AllocTemporary(); | ||
| 308 | AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); | ||
| 309 | AddLine("MOV.S {}, 0;", temporary); | ||
| 310 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 311 | |||
| 312 | const std::string op_a = Visit(operation[0]); | ||
| 313 | const std::string op_b = Visit(operation[1]); | ||
| 314 | if constexpr (unordered) { | ||
| 315 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 316 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 317 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 318 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 319 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 320 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 321 | } else if (op == SNE_F) { | ||
| 322 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 323 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 324 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 325 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 326 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 327 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 328 | } | ||
| 329 | return temporary; | ||
| 330 | } | ||
| 331 | |||
| 332 | template <const std::string_view& op, bool is_nan> | ||
| 333 | std::string HalfComparison(Operation operation) { | ||
| 334 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 335 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 336 | const std::string op_a = Visit(operation[0]); | ||
| 337 | const std::string op_b = Visit(operation[1]); | ||
| 338 | AddLine("UP2H.F {}, {};", tmp1, op_a); | ||
| 339 | AddLine("UP2H.F {}, {};", tmp2, op_b); | ||
| 340 | AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); | ||
| 341 | AddLine("TRUNC.U.CC RC.xy, {};", tmp1); | ||
| 342 | AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); | ||
| 343 | AddLine("MOV.S {}.x (NE.x), -1;", tmp1); | ||
| 344 | AddLine("MOV.S {}.y (NE.y), -1;", tmp1); | ||
| 345 | if constexpr (is_nan) { | ||
| 346 | AddLine("MOVC.F RC.x, {};", op_a); | ||
| 347 | AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); | ||
| 348 | AddLine("MOVC.F RC.x, {};", op_b); | ||
| 349 | AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); | ||
| 350 | } | ||
| 351 | return tmp1; | ||
| 352 | } | ||
| 353 | |||
| 354 | template <const std::string_view& op, const std::string_view& type> | ||
| 355 | std::string AtomicImage(Operation operation) { | ||
| 356 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 357 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 358 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 359 | const std::size_t num_values = meta.values.size(); | ||
| 360 | |||
| 361 | const std::string coord = AllocVectorTemporary(); | ||
| 362 | const std::string value = AllocVectorTemporary(); | ||
| 363 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 364 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 365 | } | ||
| 366 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 367 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 368 | } | ||
| 369 | |||
| 370 | const std::string result = coord; | ||
| 371 | AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, result, value, coord, | ||
| 372 | image_id, ImageType(meta.image.type)); | ||
| 373 | return fmt::format("{}.x", result); | ||
| 374 | } | ||
| 375 | |||
| 376 | template <const std::string_view& op, const std::string_view& type> | ||
| 377 | std::string Atomic(Operation operation) { | ||
| 378 | const std::string temporary = AllocTemporary(); | ||
| 379 | std::string address; | ||
| 380 | std::string_view opname; | ||
| 381 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||
| 382 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 383 | Visit(gmem->GetBaseAddress())); | ||
| 384 | address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); | ||
| 385 | opname = "ATOMB"; | ||
| 386 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||
| 387 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | ||
| 388 | opname = "ATOMS"; | ||
| 389 | } else { | ||
| 390 | UNREACHABLE(); | ||
| 391 | return "{0, 0, 0, 0}"; | ||
| 392 | } | ||
| 393 | AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | ||
| 394 | return temporary; | ||
| 395 | } | ||
| 396 | |||
| 397 | template <char type> | ||
| 398 | std::string Negate(Operation operation) { | ||
| 399 | const std::string temporary = AllocTemporary(); | ||
| 400 | if constexpr (type == 'F') { | ||
| 401 | AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); | ||
| 402 | } else { | ||
| 403 | AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); | ||
| 404 | } | ||
| 405 | return temporary; | ||
| 406 | } | ||
| 407 | |||
| 408 | template <char type> | ||
| 409 | std::string Absolute(Operation operation) { | ||
| 410 | const std::string temporary = AllocTemporary(); | ||
| 411 | AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); | ||
| 412 | return temporary; | ||
| 413 | } | ||
| 414 | |||
| 415 | template <char type> | ||
| 416 | std::string BitfieldInsert(Operation operation) { | ||
| 417 | const std::string temporary = AllocVectorTemporary(); | ||
| 418 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); | ||
| 419 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); | ||
| 420 | AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), | ||
| 421 | Visit(operation[0])); | ||
| 422 | return fmt::format("{}.x", temporary); | ||
| 423 | } | ||
| 424 | |||
| 425 | template <char type> | ||
| 426 | std::string BitfieldExtract(Operation operation) { | ||
| 427 | const std::string temporary = AllocVectorTemporary(); | ||
| 428 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); | ||
| 429 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); | ||
| 430 | AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); | ||
| 431 | return fmt::format("{}.x", temporary); | ||
| 432 | } | ||
| 433 | |||
| 434 | template <char swizzle> | ||
| 435 | std::string LocalInvocationId(Operation) { | ||
| 436 | return fmt::format("invocation.localid.{}", swizzle); | ||
| 437 | } | ||
| 438 | |||
| 439 | template <char swizzle> | ||
| 440 | std::string WorkGroupId(Operation) { | ||
| 441 | return fmt::format("invocation.groupid.{}", swizzle); | ||
| 442 | } | ||
| 443 | |||
| 444 | template <char c1, char c2> | ||
| 445 | std::string ThreadMask(Operation) { | ||
| 446 | return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); | ||
| 447 | } | ||
| 448 | |||
| 449 | template <typename... Args> | ||
| 450 | void AddExpression(std::string_view text, Args&&... args) { | ||
| 451 | shader_source += fmt::format(text, std::forward<Args>(args)...); | ||
| 452 | } | ||
| 453 | |||
| 454 | template <typename... Args> | ||
| 455 | void AddLine(std::string_view text, Args&&... args) { | ||
| 456 | AddExpression(text, std::forward<Args>(args)...); | ||
| 457 | shader_source += '\n'; | ||
| 458 | } | ||
| 459 | |||
| 460 | std::string AllocTemporary() { | ||
| 461 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 462 | return fmt::format("T{}.x", num_temporaries++); | ||
| 463 | } | ||
| 464 | |||
| 465 | std::string AllocVectorTemporary() { | ||
| 466 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 467 | return fmt::format("T{}", num_temporaries++); | ||
| 468 | } | ||
| 469 | |||
| 470 | void ResetTemporaries() noexcept { | ||
| 471 | num_temporaries = 0; | ||
| 472 | } | ||
| 473 | |||
| 474 | const Device& device; | ||
| 475 | const ShaderIR& ir; | ||
| 476 | const Registry& registry; | ||
| 477 | const ShaderType stage; | ||
| 478 | |||
| 479 | std::size_t num_temporaries = 0; | ||
| 480 | std::size_t max_temporaries = 0; | ||
| 481 | |||
| 482 | std::string shader_source; | ||
| 483 | |||
| 484 | static constexpr std::string_view ADD_F32 = "ADD.F32"; | ||
| 485 | static constexpr std::string_view ADD_S = "ADD.S"; | ||
| 486 | static constexpr std::string_view ADD_U = "ADD.U"; | ||
| 487 | static constexpr std::string_view MUL_F32 = "MUL.F32"; | ||
| 488 | static constexpr std::string_view MUL_S = "MUL.S"; | ||
| 489 | static constexpr std::string_view MUL_U = "MUL.U"; | ||
| 490 | static constexpr std::string_view DIV_F32 = "DIV.F32"; | ||
| 491 | static constexpr std::string_view DIV_S = "DIV.S"; | ||
| 492 | static constexpr std::string_view DIV_U = "DIV.U"; | ||
| 493 | static constexpr std::string_view MAD_F32 = "MAD.F32"; | ||
| 494 | static constexpr std::string_view RSQ_F32 = "RSQ.F32"; | ||
| 495 | static constexpr std::string_view COS_F32 = "COS.F32"; | ||
| 496 | static constexpr std::string_view SIN_F32 = "SIN.F32"; | ||
| 497 | static constexpr std::string_view EX2_F32 = "EX2.F32"; | ||
| 498 | static constexpr std::string_view LG2_F32 = "LG2.F32"; | ||
| 499 | static constexpr std::string_view SLT_F = "SLT.F32"; | ||
| 500 | static constexpr std::string_view SLT_S = "SLT.S"; | ||
| 501 | static constexpr std::string_view SLT_U = "SLT.U"; | ||
| 502 | static constexpr std::string_view SEQ_F = "SEQ.F32"; | ||
| 503 | static constexpr std::string_view SEQ_S = "SEQ.S"; | ||
| 504 | static constexpr std::string_view SEQ_U = "SEQ.U"; | ||
| 505 | static constexpr std::string_view SLE_F = "SLE.F32"; | ||
| 506 | static constexpr std::string_view SLE_S = "SLE.S"; | ||
| 507 | static constexpr std::string_view SLE_U = "SLE.U"; | ||
| 508 | static constexpr std::string_view SGT_F = "SGT.F32"; | ||
| 509 | static constexpr std::string_view SGT_S = "SGT.S"; | ||
| 510 | static constexpr std::string_view SGT_U = "SGT.U"; | ||
| 511 | static constexpr std::string_view SNE_F = "SNE.F32"; | ||
| 512 | static constexpr std::string_view SNE_S = "SNE.S"; | ||
| 513 | static constexpr std::string_view SNE_U = "SNE.U"; | ||
| 514 | static constexpr std::string_view SGE_F = "SGE.F32"; | ||
| 515 | static constexpr std::string_view SGE_S = "SGE.S"; | ||
| 516 | static constexpr std::string_view SGE_U = "SGE.U"; | ||
| 517 | static constexpr std::string_view AND_S = "AND.S"; | ||
| 518 | static constexpr std::string_view AND_U = "AND.U"; | ||
| 519 | static constexpr std::string_view TRUNC_F = "TRUNC.F"; | ||
| 520 | static constexpr std::string_view TRUNC_S = "TRUNC.S"; | ||
| 521 | static constexpr std::string_view TRUNC_U = "TRUNC.U"; | ||
| 522 | static constexpr std::string_view SHL_S = "SHL.S"; | ||
| 523 | static constexpr std::string_view SHL_U = "SHL.U"; | ||
| 524 | static constexpr std::string_view SHR_S = "SHR.S"; | ||
| 525 | static constexpr std::string_view SHR_U = "SHR.U"; | ||
| 526 | static constexpr std::string_view OR_S = "OR.S"; | ||
| 527 | static constexpr std::string_view OR_U = "OR.U"; | ||
| 528 | static constexpr std::string_view XOR_S = "XOR.S"; | ||
| 529 | static constexpr std::string_view XOR_U = "XOR.U"; | ||
| 530 | static constexpr std::string_view NOT_S = "NOT.S"; | ||
| 531 | static constexpr std::string_view NOT_U = "NOT.U"; | ||
| 532 | static constexpr std::string_view BTC_S = "BTC.S"; | ||
| 533 | static constexpr std::string_view BTC_U = "BTC.U"; | ||
| 534 | static constexpr std::string_view BTFM_S = "BTFM.S"; | ||
| 535 | static constexpr std::string_view BTFM_U = "BTFM.U"; | ||
| 536 | static constexpr std::string_view ROUND_F = "ROUND.F"; | ||
| 537 | static constexpr std::string_view CEIL_F = "CEIL.F"; | ||
| 538 | static constexpr std::string_view FLR_F = "FLR.F"; | ||
| 539 | static constexpr std::string_view I2F_S = "I2F.S"; | ||
| 540 | static constexpr std::string_view I2F_U = "I2F.U"; | ||
| 541 | static constexpr std::string_view MIN_F = "MIN.F"; | ||
| 542 | static constexpr std::string_view MIN_S = "MIN.S"; | ||
| 543 | static constexpr std::string_view MIN_U = "MIN.U"; | ||
| 544 | static constexpr std::string_view MAX_F = "MAX.F"; | ||
| 545 | static constexpr std::string_view MAX_S = "MAX.S"; | ||
| 546 | static constexpr std::string_view MAX_U = "MAX.U"; | ||
| 547 | static constexpr std::string_view MOV_U = "MOV.U"; | ||
| 548 | static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; | ||
| 549 | static constexpr std::string_view TGALL_U = "TGALL.U"; | ||
| 550 | static constexpr std::string_view TGANY_U = "TGANY.U"; | ||
| 551 | static constexpr std::string_view TGEQ_U = "TGEQ.U"; | ||
| 552 | static constexpr std::string_view EXCH = "EXCH"; | ||
| 553 | static constexpr std::string_view ADD = "ADD"; | ||
| 554 | static constexpr std::string_view MIN = "MIN"; | ||
| 555 | static constexpr std::string_view MAX = "MAX"; | ||
| 556 | static constexpr std::string_view AND = "AND"; | ||
| 557 | static constexpr std::string_view OR = "OR"; | ||
| 558 | static constexpr std::string_view XOR = "XOR"; | ||
| 559 | static constexpr std::string_view U32 = "U32"; | ||
| 560 | static constexpr std::string_view S32 = "S32"; | ||
| 561 | |||
| 562 | static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); | ||
| 563 | using DecompilerType = std::string (ARBDecompiler::*)(Operation); | ||
| 564 | static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { | ||
| 565 | &ARBDecompiler::Assign, | ||
| 566 | |||
| 567 | &ARBDecompiler::Select, | ||
| 568 | |||
| 569 | &ARBDecompiler::Binary<ADD_F32>, | ||
| 570 | &ARBDecompiler::Binary<MUL_F32>, | ||
| 571 | &ARBDecompiler::Binary<DIV_F32>, | ||
| 572 | &ARBDecompiler::Trinary<MAD_F32>, | ||
| 573 | &ARBDecompiler::Negate<'F'>, | ||
| 574 | &ARBDecompiler::Absolute<'F'>, | ||
| 575 | &ARBDecompiler::FClamp, | ||
| 576 | &ARBDecompiler::FCastHalf0, | ||
| 577 | &ARBDecompiler::FCastHalf1, | ||
| 578 | &ARBDecompiler::Binary<MIN_F>, | ||
| 579 | &ARBDecompiler::Binary<MAX_F>, | ||
| 580 | &ARBDecompiler::Unary<COS_F32>, | ||
| 581 | &ARBDecompiler::Unary<SIN_F32>, | ||
| 582 | &ARBDecompiler::Unary<EX2_F32>, | ||
| 583 | &ARBDecompiler::Unary<LG2_F32>, | ||
| 584 | &ARBDecompiler::Unary<RSQ_F32>, | ||
| 585 | &ARBDecompiler::FSqrt, | ||
| 586 | &ARBDecompiler::Unary<ROUND_F>, | ||
| 587 | &ARBDecompiler::Unary<FLR_F>, | ||
| 588 | &ARBDecompiler::Unary<CEIL_F>, | ||
| 589 | &ARBDecompiler::Unary<TRUNC_F>, | ||
| 590 | &ARBDecompiler::Unary<I2F_S>, | ||
| 591 | &ARBDecompiler::Unary<I2F_U>, | ||
| 592 | &ARBDecompiler::FSwizzleAdd, | ||
| 593 | |||
| 594 | &ARBDecompiler::Binary<ADD_S>, | ||
| 595 | &ARBDecompiler::Binary<MUL_S>, | ||
| 596 | &ARBDecompiler::Binary<DIV_S>, | ||
| 597 | &ARBDecompiler::Negate<'S'>, | ||
| 598 | &ARBDecompiler::Absolute<'S'>, | ||
| 599 | &ARBDecompiler::Binary<MIN_S>, | ||
| 600 | &ARBDecompiler::Binary<MAX_S>, | ||
| 601 | |||
| 602 | &ARBDecompiler::Unary<TRUNC_S>, | ||
| 603 | &ARBDecompiler::Unary<MOV_U>, | ||
| 604 | &ARBDecompiler::Binary<SHL_S>, | ||
| 605 | &ARBDecompiler::Binary<SHR_U>, | ||
| 606 | &ARBDecompiler::Binary<SHR_S>, | ||
| 607 | &ARBDecompiler::Binary<AND_S>, | ||
| 608 | &ARBDecompiler::Binary<OR_S>, | ||
| 609 | &ARBDecompiler::Binary<XOR_S>, | ||
| 610 | &ARBDecompiler::Unary<NOT_S>, | ||
| 611 | &ARBDecompiler::BitfieldInsert<'S'>, | ||
| 612 | &ARBDecompiler::BitfieldExtract<'S'>, | ||
| 613 | &ARBDecompiler::Unary<BTC_S>, | ||
| 614 | &ARBDecompiler::Unary<BTFM_S>, | ||
| 615 | |||
| 616 | &ARBDecompiler::Binary<ADD_U>, | ||
| 617 | &ARBDecompiler::Binary<MUL_U>, | ||
| 618 | &ARBDecompiler::Binary<DIV_U>, | ||
| 619 | &ARBDecompiler::Binary<MIN_U>, | ||
| 620 | &ARBDecompiler::Binary<MAX_U>, | ||
| 621 | &ARBDecompiler::Unary<TRUNC_U>, | ||
| 622 | &ARBDecompiler::Unary<MOV_U>, | ||
| 623 | &ARBDecompiler::Binary<SHL_U>, | ||
| 624 | &ARBDecompiler::Binary<SHR_U>, | ||
| 625 | &ARBDecompiler::Binary<SHR_U>, | ||
| 626 | &ARBDecompiler::Binary<AND_U>, | ||
| 627 | &ARBDecompiler::Binary<OR_U>, | ||
| 628 | &ARBDecompiler::Binary<XOR_U>, | ||
| 629 | &ARBDecompiler::Unary<NOT_U>, | ||
| 630 | &ARBDecompiler::BitfieldInsert<'U'>, | ||
| 631 | &ARBDecompiler::BitfieldExtract<'U'>, | ||
| 632 | &ARBDecompiler::Unary<BTC_U>, | ||
| 633 | &ARBDecompiler::Unary<BTFM_U>, | ||
| 634 | |||
| 635 | &ARBDecompiler::HAdd2, | ||
| 636 | &ARBDecompiler::HMul2, | ||
| 637 | &ARBDecompiler::HFma2, | ||
| 638 | &ARBDecompiler::HAbsolute, | ||
| 639 | &ARBDecompiler::HNegate, | ||
| 640 | &ARBDecompiler::HClamp, | ||
| 641 | &ARBDecompiler::HCastFloat, | ||
| 642 | &ARBDecompiler::HUnpack, | ||
| 643 | &ARBDecompiler::HMergeF32, | ||
| 644 | &ARBDecompiler::HMergeH0, | ||
| 645 | &ARBDecompiler::HMergeH1, | ||
| 646 | &ARBDecompiler::HPack2, | ||
| 647 | |||
| 648 | &ARBDecompiler::LogicalAssign, | ||
| 649 | &ARBDecompiler::Binary<AND_U>, | ||
| 650 | &ARBDecompiler::Binary<OR_U>, | ||
| 651 | &ARBDecompiler::Binary<XOR_U>, | ||
| 652 | &ARBDecompiler::Unary<NOT_U>, | ||
| 653 | &ARBDecompiler::LogicalPick2, | ||
| 654 | &ARBDecompiler::LogicalAnd2, | ||
| 655 | |||
| 656 | &ARBDecompiler::FloatComparison<SLT_F, false>, | ||
| 657 | &ARBDecompiler::FloatComparison<SEQ_F, false>, | ||
| 658 | &ARBDecompiler::FloatComparison<SLE_F, false>, | ||
| 659 | &ARBDecompiler::FloatComparison<SGT_F, false>, | ||
| 660 | &ARBDecompiler::FloatComparison<SNE_F, false>, | ||
| 661 | &ARBDecompiler::FloatComparison<SGE_F, false>, | ||
| 662 | &ARBDecompiler::FloatOrdered, | ||
| 663 | &ARBDecompiler::FloatUnordered, | ||
| 664 | &ARBDecompiler::FloatComparison<SLT_F, true>, | ||
| 665 | &ARBDecompiler::FloatComparison<SEQ_F, true>, | ||
| 666 | &ARBDecompiler::FloatComparison<SLE_F, true>, | ||
| 667 | &ARBDecompiler::FloatComparison<SGT_F, true>, | ||
| 668 | &ARBDecompiler::FloatComparison<SNE_F, true>, | ||
| 669 | &ARBDecompiler::FloatComparison<SGE_F, true>, | ||
| 670 | |||
| 671 | &ARBDecompiler::Binary<SLT_S>, | ||
| 672 | &ARBDecompiler::Binary<SEQ_S>, | ||
| 673 | &ARBDecompiler::Binary<SLE_S>, | ||
| 674 | &ARBDecompiler::Binary<SGT_S>, | ||
| 675 | &ARBDecompiler::Binary<SNE_S>, | ||
| 676 | &ARBDecompiler::Binary<SGE_S>, | ||
| 677 | |||
| 678 | &ARBDecompiler::Binary<SLT_U>, | ||
| 679 | &ARBDecompiler::Binary<SEQ_U>, | ||
| 680 | &ARBDecompiler::Binary<SLE_U>, | ||
| 681 | &ARBDecompiler::Binary<SGT_U>, | ||
| 682 | &ARBDecompiler::Binary<SNE_U>, | ||
| 683 | &ARBDecompiler::Binary<SGE_U>, | ||
| 684 | |||
| 685 | &ARBDecompiler::LogicalAddCarry, | ||
| 686 | |||
| 687 | &ARBDecompiler::HalfComparison<SLT_F, false>, | ||
| 688 | &ARBDecompiler::HalfComparison<SEQ_F, false>, | ||
| 689 | &ARBDecompiler::HalfComparison<SLE_F, false>, | ||
| 690 | &ARBDecompiler::HalfComparison<SGT_F, false>, | ||
| 691 | &ARBDecompiler::HalfComparison<SNE_F, false>, | ||
| 692 | &ARBDecompiler::HalfComparison<SGE_F, false>, | ||
| 693 | &ARBDecompiler::HalfComparison<SLT_F, true>, | ||
| 694 | &ARBDecompiler::HalfComparison<SEQ_F, true>, | ||
| 695 | &ARBDecompiler::HalfComparison<SLE_F, true>, | ||
| 696 | &ARBDecompiler::HalfComparison<SGT_F, true>, | ||
| 697 | &ARBDecompiler::HalfComparison<SNE_F, true>, | ||
| 698 | &ARBDecompiler::HalfComparison<SGE_F, true>, | ||
| 699 | |||
| 700 | &ARBDecompiler::Texture, | ||
| 701 | &ARBDecompiler::Texture, | ||
| 702 | &ARBDecompiler::TextureGather, | ||
| 703 | &ARBDecompiler::TextureQueryDimensions, | ||
| 704 | &ARBDecompiler::TextureQueryLod, | ||
| 705 | &ARBDecompiler::TexelFetch, | ||
| 706 | &ARBDecompiler::TextureGradient, | ||
| 707 | |||
| 708 | &ARBDecompiler::ImageLoad, | ||
| 709 | &ARBDecompiler::ImageStore, | ||
| 710 | |||
| 711 | &ARBDecompiler::AtomicImage<ADD, U32>, | ||
| 712 | &ARBDecompiler::AtomicImage<AND, U32>, | ||
| 713 | &ARBDecompiler::AtomicImage<OR, U32>, | ||
| 714 | &ARBDecompiler::AtomicImage<XOR, U32>, | ||
| 715 | &ARBDecompiler::AtomicImage<EXCH, U32>, | ||
| 716 | |||
| 717 | &ARBDecompiler::Atomic<EXCH, U32>, | ||
| 718 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 719 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 720 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 721 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 722 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 723 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 724 | |||
| 725 | &ARBDecompiler::Atomic<EXCH, S32>, | ||
| 726 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 727 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 728 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 729 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 730 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 731 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 732 | |||
| 733 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 734 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 735 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 736 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 737 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 738 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 739 | |||
| 740 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 741 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 742 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 743 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 744 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 745 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 746 | |||
| 747 | &ARBDecompiler::Branch, | ||
| 748 | &ARBDecompiler::BranchIndirect, | ||
| 749 | &ARBDecompiler::PushFlowStack, | ||
| 750 | &ARBDecompiler::PopFlowStack, | ||
| 751 | &ARBDecompiler::Exit, | ||
| 752 | &ARBDecompiler::Discard, | ||
| 753 | |||
| 754 | &ARBDecompiler::EmitVertex, | ||
| 755 | &ARBDecompiler::EndPrimitive, | ||
| 756 | |||
| 757 | &ARBDecompiler::InvocationId, | ||
| 758 | &ARBDecompiler::YNegate, | ||
| 759 | &ARBDecompiler::LocalInvocationId<'x'>, | ||
| 760 | &ARBDecompiler::LocalInvocationId<'y'>, | ||
| 761 | &ARBDecompiler::LocalInvocationId<'z'>, | ||
| 762 | &ARBDecompiler::WorkGroupId<'x'>, | ||
| 763 | &ARBDecompiler::WorkGroupId<'y'>, | ||
| 764 | &ARBDecompiler::WorkGroupId<'z'>, | ||
| 765 | |||
| 766 | &ARBDecompiler::Unary<TGBALLOT_U>, | ||
| 767 | &ARBDecompiler::Unary<TGALL_U>, | ||
| 768 | &ARBDecompiler::Unary<TGANY_U>, | ||
| 769 | &ARBDecompiler::Unary<TGEQ_U>, | ||
| 770 | |||
| 771 | &ARBDecompiler::ThreadId, | ||
| 772 | &ARBDecompiler::ThreadMask<'e', 'q'>, | ||
| 773 | &ARBDecompiler::ThreadMask<'g', 'e'>, | ||
| 774 | &ARBDecompiler::ThreadMask<'g', 't'>, | ||
| 775 | &ARBDecompiler::ThreadMask<'l', 'e'>, | ||
| 776 | &ARBDecompiler::ThreadMask<'l', 't'>, | ||
| 777 | &ARBDecompiler::ShuffleIndexed, | ||
| 778 | |||
| 779 | &ARBDecompiler::Barrier, | ||
| 780 | &ARBDecompiler::MemoryBarrierGroup, | ||
| 781 | &ARBDecompiler::MemoryBarrierGlobal, | ||
| 782 | }; | ||
| 783 | }; | ||
| 784 | |||
| 785 | ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 786 | ShaderType stage, std::string_view identifier) | ||
| 787 | : device{device}, ir{ir}, registry{registry}, stage{stage} { | ||
| 788 | AddLine("TEMP RC;"); | ||
| 789 | if (ir.IsDecompiled()) { | ||
| 790 | DecompileAST(); | ||
| 791 | } else { | ||
| 792 | DecompileBranchMode(); | ||
| 793 | } | ||
| 794 | AddLine("END"); | ||
| 795 | |||
| 796 | const std::string code = std::move(shader_source); | ||
| 797 | DeclareHeader(); | ||
| 798 | DeclareVertex(); | ||
| 799 | DeclareGeometry(); | ||
| 800 | DeclareFragment(); | ||
| 801 | DeclareCompute(); | ||
| 802 | DeclareInputAttributes(); | ||
| 803 | DeclareOutputAttributes(); | ||
| 804 | DeclareLocalMemory(); | ||
| 805 | DeclareGlobalMemory(); | ||
| 806 | DeclareConstantBuffers(); | ||
| 807 | DeclareRegisters(); | ||
| 808 | DeclareTemporaries(); | ||
| 809 | DeclarePredicates(); | ||
| 810 | DeclareInternalFlags(); | ||
| 811 | |||
| 812 | shader_source += code; | ||
| 813 | } | ||
| 814 | |||
| 815 | std::string_view HeaderStageName(ShaderType stage) { | ||
| 816 | switch (stage) { | ||
| 817 | case ShaderType::Vertex: | ||
| 818 | return "vp"; | ||
| 819 | case ShaderType::Geometry: | ||
| 820 | return "gp"; | ||
| 821 | case ShaderType::Fragment: | ||
| 822 | return "fp"; | ||
| 823 | case ShaderType::Compute: | ||
| 824 | return "cp"; | ||
| 825 | default: | ||
| 826 | UNREACHABLE(); | ||
| 827 | return ""; | ||
| 828 | } | ||
| 829 | } | ||
| 830 | |||
| 831 | void ARBDecompiler::DeclareHeader() { | ||
| 832 | AddLine("!!NV{}5.0", HeaderStageName(stage)); | ||
| 833 | // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D | ||
| 834 | AddLine("OPTION NV_internal;"); | ||
| 835 | AddLine("OPTION NV_gpu_program_fp64;"); | ||
| 836 | AddLine("OPTION NV_shader_storage_buffer;"); | ||
| 837 | AddLine("OPTION NV_shader_thread_group;"); | ||
| 838 | if (ir.UsesWarps() && device.HasWarpIntrinsics()) { | ||
| 839 | AddLine("OPTION NV_shader_thread_shuffle;"); | ||
| 840 | } | ||
| 841 | if (stage == ShaderType::Vertex) { | ||
| 842 | if (device.HasNvViewportArray2()) { | ||
| 843 | AddLine("OPTION NV_viewport_array2;"); | ||
| 844 | } | ||
| 845 | } | ||
| 846 | if (stage == ShaderType::Fragment) { | ||
| 847 | AddLine("OPTION ARB_draw_buffers;"); | ||
| 848 | } | ||
| 849 | if (device.HasImageLoadFormatted()) { | ||
| 850 | AddLine("OPTION EXT_shader_image_load_formatted;"); | ||
| 851 | } | ||
| 852 | } | ||
| 853 | |||
| 854 | void ARBDecompiler::DeclareVertex() { | ||
| 855 | if (stage != ShaderType::Vertex) { | ||
| 856 | return; | ||
| 857 | } | ||
| 858 | AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); | ||
| 859 | } | ||
| 860 | |||
| 861 | void ARBDecompiler::DeclareGeometry() { | ||
| 862 | if (stage != ShaderType::Geometry) { | ||
| 863 | return; | ||
| 864 | } | ||
| 865 | const auto& info = registry.GetGraphicsInfo(); | ||
| 866 | const auto& header = ir.GetHeader(); | ||
| 867 | AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); | ||
| 868 | AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); | ||
| 869 | AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); | ||
| 870 | AddLine("ATTRIB vertex_position = vertex.position;"); | ||
| 871 | } | ||
| 872 | |||
| 873 | void ARBDecompiler::DeclareFragment() { | ||
| 874 | if (stage != ShaderType::Fragment) { | ||
| 875 | return; | ||
| 876 | } | ||
| 877 | AddLine("OUTPUT result_color7 = result.color[7];"); | ||
| 878 | AddLine("OUTPUT result_color6 = result.color[6];"); | ||
| 879 | AddLine("OUTPUT result_color5 = result.color[5];"); | ||
| 880 | AddLine("OUTPUT result_color4 = result.color[4];"); | ||
| 881 | AddLine("OUTPUT result_color3 = result.color[3];"); | ||
| 882 | AddLine("OUTPUT result_color2 = result.color[2];"); | ||
| 883 | AddLine("OUTPUT result_color1 = result.color[1];"); | ||
| 884 | AddLine("OUTPUT result_color0 = result.color;"); | ||
| 885 | } | ||
| 886 | |||
| 887 | void ARBDecompiler::DeclareCompute() { | ||
| 888 | if (stage != ShaderType::Compute) { | ||
| 889 | return; | ||
| 890 | } | ||
| 891 | const ComputeInfo& info = registry.GetComputeInfo(); | ||
| 892 | AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], | ||
| 893 | info.workgroup_size[2]); | ||
| 894 | if (info.shared_memory_size_in_words > 0) { | ||
| 895 | const u32 size_in_bytes = info.shared_memory_size_in_words * 4; | ||
| 896 | AddLine("SHARED_MEMORY {};", size_in_bytes); | ||
| 897 | AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | ||
| 898 | } | ||
| 899 | } | ||
| 900 | |||
| 901 | void ARBDecompiler::DeclareInputAttributes() { | ||
| 902 | if (stage == ShaderType::Compute) { | ||
| 903 | return; | ||
| 904 | } | ||
| 905 | const std::string_view stage_name = StageInputName(stage); | ||
| 906 | for (const auto attribute : ir.GetInputAttributes()) { | ||
| 907 | if (!IsGenericAttribute(attribute)) { | ||
| 908 | continue; | ||
| 909 | } | ||
| 910 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 911 | |||
| 912 | std::string_view suffix; | ||
| 913 | if (stage == ShaderType::Fragment) { | ||
| 914 | const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; | ||
| 915 | if (input_mode == PixelImap::Unused) { | ||
| 916 | return; | ||
| 917 | } | ||
| 918 | suffix = GetInputFlags(input_mode); | ||
| 919 | } | ||
| 920 | AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, | ||
| 921 | index); | ||
| 922 | } | ||
| 923 | } | ||
| 924 | |||
| 925 | void ARBDecompiler::DeclareOutputAttributes() { | ||
| 926 | if (stage == ShaderType::Compute) { | ||
| 927 | return; | ||
| 928 | } | ||
| 929 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 930 | if (!IsGenericAttribute(attribute)) { | ||
| 931 | continue; | ||
| 932 | } | ||
| 933 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 934 | AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); | ||
| 935 | } | ||
| 936 | } | ||
| 937 | |||
| 938 | void ARBDecompiler::DeclareLocalMemory() { | ||
| 939 | u64 size = 0; | ||
| 940 | if (stage == ShaderType::Compute) { | ||
| 941 | size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 942 | } else { | ||
| 943 | size = ir.GetHeader().GetLocalMemorySize(); | ||
| 944 | } | ||
| 945 | if (size == 0) { | ||
| 946 | return; | ||
| 947 | } | ||
| 948 | const u64 element_count = Common::AlignUp(size, 4) / 4; | ||
| 949 | AddLine("TEMP lmem[{}];", element_count); | ||
| 950 | } | ||
| 951 | |||
| 952 | void ARBDecompiler::DeclareGlobalMemory() { | ||
| 953 | u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 954 | for (const auto& pair : ir.GetGlobalMemory()) { | ||
| 955 | const auto& base = pair.first; | ||
| 956 | AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); | ||
| 957 | ++binding; | ||
| 958 | } | ||
| 959 | } | ||
| 960 | |||
| 961 | void ARBDecompiler::DeclareConstantBuffers() { | ||
| 962 | u32 binding = 0; | ||
| 963 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 964 | AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); | ||
| 965 | ++binding; | ||
| 966 | } | ||
| 967 | } | ||
| 968 | |||
| 969 | void ARBDecompiler::DeclareRegisters() { | ||
| 970 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 971 | AddLine("TEMP R{};", gpr); | ||
| 972 | } | ||
| 973 | } | ||
| 974 | |||
| 975 | void ARBDecompiler::DeclareTemporaries() { | ||
| 976 | for (std::size_t i = 0; i < max_temporaries; ++i) { | ||
| 977 | AddLine("TEMP T{};", i); | ||
| 978 | } | ||
| 979 | } | ||
| 980 | |||
| 981 | void ARBDecompiler::DeclarePredicates() { | ||
| 982 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 983 | AddLine("TEMP P{};", static_cast<u64>(pred)); | ||
| 984 | } | ||
| 985 | } | ||
| 986 | |||
| 987 | void ARBDecompiler::DeclareInternalFlags() { | ||
| 988 | for (const char* name : INTERNAL_FLAG_NAMES) { | ||
| 989 | AddLine("TEMP {};", name); | ||
| 990 | } | ||
| 991 | } | ||
| 992 | |||
| 993 | void ARBDecompiler::InitializeVariables() { | ||
| 994 | if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { | ||
| 995 | AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); | ||
| 996 | } | ||
| 997 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 998 | if (!IsGenericAttribute(attribute)) { | ||
| 999 | continue; | ||
| 1000 | } | ||
| 1001 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 1002 | AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); | ||
| 1003 | } | ||
| 1004 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1005 | AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); | ||
| 1006 | } | ||
| 1007 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1008 | AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); | ||
| 1009 | } | ||
| 1010 | } | ||
| 1011 | |||
| 1012 | void ARBDecompiler::DecompileAST() { | ||
| 1013 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 1014 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1015 | AddLine("TEMP F{};", i); | ||
| 1016 | } | ||
| 1017 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1018 | AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | InitializeVariables(); | ||
| 1022 | |||
| 1023 | VisitAST(ir.GetASTProgram()); | ||
| 1024 | } | ||
| 1025 | |||
| 1026 | void ARBDecompiler::DecompileBranchMode() { | ||
| 1027 | static constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 1028 | if (!ir.IsFlowStackDisabled()) { | ||
| 1029 | AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); | ||
| 1030 | AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); | ||
| 1031 | AddLine("TEMP SSY_TOP;"); | ||
| 1032 | AddLine("TEMP PBK_TOP;"); | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | AddLine("TEMP PC;"); | ||
| 1036 | |||
| 1037 | if (!ir.IsFlowStackDisabled()) { | ||
| 1038 | AddLine("MOV.U SSY_TOP.x, 0;"); | ||
| 1039 | AddLine("MOV.U PBK_TOP.x, 0;"); | ||
| 1040 | } | ||
| 1041 | |||
| 1042 | InitializeVariables(); | ||
| 1043 | |||
| 1044 | const auto basic_block_end = ir.GetBasicBlocks().end(); | ||
| 1045 | auto basic_block_it = ir.GetBasicBlocks().begin(); | ||
| 1046 | const u32 first_address = basic_block_it->first; | ||
| 1047 | AddLine("MOV.U PC.x, {};", first_address); | ||
| 1048 | |||
| 1049 | AddLine("REP;"); | ||
| 1050 | |||
| 1051 | std::size_t num_blocks = 0; | ||
| 1052 | while (basic_block_it != basic_block_end) { | ||
| 1053 | const auto& [address, bb] = *basic_block_it; | ||
| 1054 | ++num_blocks; | ||
| 1055 | |||
| 1056 | AddLine("SEQ.S.CC RC.x, PC.x, {};", address); | ||
| 1057 | AddLine("IF NE.x;"); | ||
| 1058 | |||
| 1059 | VisitBlock(bb); | ||
| 1060 | |||
| 1061 | ++basic_block_it; | ||
| 1062 | |||
| 1063 | if (basic_block_it != basic_block_end) { | ||
| 1064 | const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); | ||
| 1065 | if (!op || op->GetCode() != OperationCode::Branch) { | ||
| 1066 | const u32 next_address = basic_block_it->first; | ||
| 1067 | AddLine("MOV.U PC.x, {};", next_address); | ||
| 1068 | AddLine("CONT;"); | ||
| 1069 | } | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | AddLine("ELSE;"); | ||
| 1073 | } | ||
| 1074 | AddLine("RET;"); | ||
| 1075 | while (num_blocks--) { | ||
| 1076 | AddLine("ENDIF;"); | ||
| 1077 | } | ||
| 1078 | |||
| 1079 | AddLine("ENDREP;"); | ||
| 1080 | } | ||
| 1081 | |||
| 1082 | void ARBDecompiler::VisitAST(const ASTNode& node) { | ||
| 1083 | if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { | ||
| 1084 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1085 | VisitAST(current); | ||
| 1086 | } | ||
| 1087 | } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) { | ||
| 1088 | const std::string condition = VisitExpression(ast->condition); | ||
| 1089 | ResetTemporaries(); | ||
| 1090 | |||
| 1091 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1092 | AddLine("IF NE.x;"); | ||
| 1093 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1094 | VisitAST(current); | ||
| 1095 | } | ||
| 1096 | AddLine("ENDIF;"); | ||
| 1097 | } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) { | ||
| 1098 | AddLine("ELSE;"); | ||
| 1099 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1100 | VisitAST(current); | ||
| 1101 | } | ||
| 1102 | } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { | ||
| 1103 | VisitBlock(ast->nodes); | ||
| 1104 | } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) { | ||
| 1105 | AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition)); | ||
| 1106 | ResetTemporaries(); | ||
| 1107 | } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { | ||
| 1108 | const std::string condition = VisitExpression(ast->condition); | ||
| 1109 | ResetTemporaries(); | ||
| 1110 | AddLine("REP;"); | ||
| 1111 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1112 | VisitAST(current); | ||
| 1113 | } | ||
| 1114 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1115 | AddLine("BRK (NE.x);"); | ||
| 1116 | AddLine("ENDREP;"); | ||
| 1117 | } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) { | ||
| 1118 | const bool is_true = ExprIsTrue(ast->condition); | ||
| 1119 | if (!is_true) { | ||
| 1120 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); | ||
| 1121 | AddLine("IF NE.x;"); | ||
| 1122 | ResetTemporaries(); | ||
| 1123 | } | ||
| 1124 | if (ast->kills) { | ||
| 1125 | AddLine("KIL TR;"); | ||
| 1126 | } else { | ||
| 1127 | Exit(); | ||
| 1128 | } | ||
| 1129 | if (!is_true) { | ||
| 1130 | AddLine("ENDIF;"); | ||
| 1131 | } | ||
| 1132 | } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) { | ||
| 1133 | if (ExprIsTrue(ast->condition)) { | ||
| 1134 | AddLine("BRK;"); | ||
| 1135 | } else { | ||
| 1136 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); | ||
| 1137 | AddLine("BRK (NE.x);"); | ||
| 1138 | ResetTemporaries(); | ||
| 1139 | } | ||
| 1140 | } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { | ||
| 1141 | // Nothing to do | ||
| 1142 | } else { | ||
| 1143 | UNREACHABLE(); | ||
| 1144 | } | ||
| 1145 | } | ||
| 1146 | |||
| 1147 | std::string ARBDecompiler::VisitExpression(const Expr& node) { | ||
| 1148 | const std::string result = AllocTemporary(); | ||
| 1149 | if (const auto expr = std::get_if<ExprAnd>(&*node)) { | ||
| 1150 | AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1151 | VisitExpression(expr->operand2)); | ||
| 1152 | return result; | ||
| 1153 | } | ||
| 1154 | if (const auto expr = std::get_if<ExprOr>(&*node)) { | ||
| 1155 | const std::string result = AllocTemporary(); | ||
| 1156 | AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1157 | VisitExpression(expr->operand2)); | ||
| 1158 | return result; | ||
| 1159 | } | ||
| 1160 | if (const auto expr = std::get_if<ExprNot>(&*node)) { | ||
| 1161 | const std::string result = AllocTemporary(); | ||
| 1162 | AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); | ||
| 1163 | return result; | ||
| 1164 | } | ||
| 1165 | if (const auto expr = std::get_if<ExprPredicate>(&*node)) { | ||
| 1166 | return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); | ||
| 1167 | } | ||
| 1168 | if (const auto expr = std::get_if<ExprCondCode>(&*node)) { | ||
| 1169 | return Visit(ir.GetConditionCode(expr->cc)); | ||
| 1170 | } | ||
| 1171 | if (const auto expr = std::get_if<ExprVar>(&*node)) { | ||
| 1172 | return fmt::format("F{}.x", expr->var_index); | ||
| 1173 | } | ||
| 1174 | if (const auto expr = std::get_if<ExprBoolean>(&*node)) { | ||
| 1175 | return expr->value ? "0xffffffff" : "0"; | ||
| 1176 | } | ||
| 1177 | if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { | ||
| 1178 | const std::string result = AllocTemporary(); | ||
| 1179 | AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); | ||
| 1180 | return result; | ||
| 1181 | } | ||
| 1182 | UNREACHABLE(); | ||
| 1183 | return "0"; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | void ARBDecompiler::VisitBlock(const NodeBlock& bb) { | ||
| 1187 | for (const auto& node : bb) { | ||
| 1188 | Visit(node); | ||
| 1189 | } | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | std::string ARBDecompiler::Visit(const Node& node) { | ||
| 1193 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 1194 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 1195 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1196 | } | ||
| 1197 | const std::size_t index = static_cast<std::size_t>(operation->GetCode()); | ||
| 1198 | if (index >= OPERATION_DECOMPILERS.size()) { | ||
| 1199 | UNREACHABLE_MSG("Out of bounds operation: {}", index); | ||
| 1200 | return {}; | ||
| 1201 | } | ||
| 1202 | const auto decompiler = OPERATION_DECOMPILERS[index]; | ||
| 1203 | if (decompiler == nullptr) { | ||
| 1204 | UNREACHABLE_MSG("Undefined operation: {}", index); | ||
| 1205 | return {}; | ||
| 1206 | } | ||
| 1207 | return (this->*decompiler)(*operation); | ||
| 1208 | } | ||
| 1209 | |||
| 1210 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1211 | const u32 index = gpr->GetIndex(); | ||
| 1212 | if (index == Register::ZeroIndex) { | ||
| 1213 | return "{0, 0, 0, 0}.x"; | ||
| 1214 | } | ||
| 1215 | return fmt::format("R{}.x", index); | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1219 | return fmt::format("CV{}.x", cv->GetIndex()); | ||
| 1220 | } | ||
| 1221 | |||
| 1222 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1223 | const std::string temporary = AllocTemporary(); | ||
| 1224 | AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); | ||
| 1225 | return temporary; | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1229 | const std::string temporary = AllocTemporary(); | ||
| 1230 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1231 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1232 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1233 | break; | ||
| 1234 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1235 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1236 | break; | ||
| 1237 | default: | ||
| 1238 | AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); | ||
| 1239 | break; | ||
| 1240 | } | ||
| 1241 | if (predicate->IsNegated()) { | ||
| 1242 | AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); | ||
| 1243 | } | ||
| 1244 | return temporary; | ||
| 1245 | } | ||
| 1246 | |||
| 1247 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1248 | if (abuf->IsPhysicalBuffer()) { | ||
| 1249 | UNIMPLEMENTED_MSG("Physical buffers are not implemented"); | ||
| 1250 | return "{0, 0, 0, 0}.x"; | ||
| 1251 | } | ||
| 1252 | |||
| 1253 | const auto buffer_index = [this, &abuf]() -> std::string { | ||
| 1254 | if (stage != ShaderType::Geometry) { | ||
| 1255 | return ""; | ||
| 1256 | } | ||
| 1257 | return fmt::format("[{}]", Visit(abuf->GetBuffer())); | ||
| 1258 | }; | ||
| 1259 | |||
| 1260 | const Attribute::Index index = abuf->GetIndex(); | ||
| 1261 | const u32 element = abuf->GetElement(); | ||
| 1262 | const char swizzle = Swizzle(element); | ||
| 1263 | switch (index) { | ||
| 1264 | case Attribute::Index::Position: { | ||
| 1265 | if (stage == ShaderType::Geometry) { | ||
| 1266 | return fmt::format("{}_position[{}].{}", StageInputName(stage), | ||
| 1267 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1268 | } else { | ||
| 1269 | return fmt::format("{}.position.{}", StageInputName(stage), swizzle); | ||
| 1270 | } | ||
| 1271 | } | ||
| 1272 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1273 | ASSERT(stage == ShaderType::Vertex); | ||
| 1274 | switch (element) { | ||
| 1275 | case 2: | ||
| 1276 | return "vertex.instance"; | ||
| 1277 | case 3: | ||
| 1278 | return "vertex.id"; | ||
| 1279 | } | ||
| 1280 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1281 | break; | ||
| 1282 | case Attribute::Index::PointCoord: | ||
| 1283 | switch (element) { | ||
| 1284 | case 0: | ||
| 1285 | return "fragment.pointcoord.x"; | ||
| 1286 | case 1: | ||
| 1287 | return "fragment.pointcoord.y"; | ||
| 1288 | } | ||
| 1289 | UNIMPLEMENTED(); | ||
| 1290 | break; | ||
| 1291 | case Attribute::Index::FrontFacing: { | ||
| 1292 | ASSERT(stage == ShaderType::Fragment); | ||
| 1293 | ASSERT(element == 3); | ||
| 1294 | const std::string temporary = AllocVectorTemporary(); | ||
| 1295 | AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); | ||
| 1296 | AddLine("MOV.U.CC RC.x, -RC;"); | ||
| 1297 | AddLine("MOV.S {}.x, 0;", temporary); | ||
| 1298 | AddLine("MOV.S {}.x (NE.x), -1;", temporary); | ||
| 1299 | return fmt::format("{}.x", temporary); | ||
| 1300 | } | ||
| 1301 | default: | ||
| 1302 | if (IsGenericAttribute(index)) { | ||
| 1303 | if (stage == ShaderType::Geometry) { | ||
| 1304 | return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), | ||
| 1305 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1306 | } else { | ||
| 1307 | return fmt::format("{}.attrib[{}].{}", StageInputName(stage), | ||
| 1308 | GetGenericAttributeIndex(index), swizzle); | ||
| 1309 | } | ||
| 1310 | } | ||
| 1311 | UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index)); | ||
| 1312 | break; | ||
| 1313 | } | ||
| 1314 | return "{0, 0, 0, 0}.x"; | ||
| 1315 | } | ||
| 1316 | |||
| 1317 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1318 | std::string offset_string; | ||
| 1319 | const auto& offset = cbuf->GetOffset(); | ||
| 1320 | if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1321 | offset_string = std::to_string(imm->GetValue()); | ||
| 1322 | } else { | ||
| 1323 | offset_string = Visit(offset); | ||
| 1324 | } | ||
| 1325 | const std::string temporary = AllocTemporary(); | ||
| 1326 | AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); | ||
| 1327 | return temporary; | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1331 | const std::string temporary = AllocTemporary(); | ||
| 1332 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1333 | Visit(gmem->GetBaseAddress())); | ||
| 1334 | AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1335 | temporary); | ||
| 1336 | return temporary; | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1340 | const std::string temporary = Visit(lmem->GetAddress()); | ||
| 1341 | AddLine("SHR.U {}, {}, 2;", temporary, temporary); | ||
| 1342 | AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); | ||
| 1343 | return temporary; | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1347 | const std::string temporary = Visit(smem->GetAddress()); | ||
| 1348 | AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); | ||
| 1349 | return temporary; | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1353 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1354 | return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1358 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1359 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1360 | } | ||
| 1361 | AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); | ||
| 1362 | AddLine("IF NE.x;"); | ||
| 1363 | VisitBlock(conditional->GetCode()); | ||
| 1364 | AddLine("ENDIF;"); | ||
| 1365 | return {}; | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | if (const auto cmt = std::get_if<CommentNode>(&*node)) { | ||
| 1369 | // Uncommenting this will generate invalid code. GLASM lacks comments. | ||
| 1370 | // AddLine("// {}", cmt->GetText()); | ||
| 1371 | return {}; | ||
| 1372 | } | ||
| 1373 | |||
| 1374 | UNIMPLEMENTED(); | ||
| 1375 | return {}; | ||
| 1376 | } | ||
| 1377 | |||
| 1378 | std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { | ||
| 1379 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1380 | UNIMPLEMENTED_IF(meta.sampler.is_indexed); | ||
| 1381 | UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array && | ||
| 1382 | meta.sampler.type == Tegra::Shader::TextureType::TextureCube); | ||
| 1383 | |||
| 1384 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1385 | std::string temporary = AllocVectorTemporary(); | ||
| 1386 | std::size_t i = 0; | ||
| 1387 | for (; i < count; ++i) { | ||
| 1388 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1389 | } | ||
| 1390 | if (meta.sampler.is_array) { | ||
| 1391 | AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array)); | ||
| 1392 | } | ||
| 1393 | if (meta.sampler.is_shadow) { | ||
| 1394 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare)); | ||
| 1395 | } | ||
| 1396 | return {std::move(temporary), i}; | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | std::string ARBDecompiler::BuildAoffi(Operation operation) { | ||
| 1400 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1401 | if (meta.aoffi.empty()) { | ||
| 1402 | return {}; | ||
| 1403 | } | ||
| 1404 | const std::string temporary = AllocVectorTemporary(); | ||
| 1405 | std::size_t i = 0; | ||
| 1406 | for (auto& node : meta.aoffi) { | ||
| 1407 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); | ||
| 1408 | } | ||
| 1409 | return fmt::format(", offset({})", temporary); | ||
| 1410 | } | ||
| 1411 | |||
| 1412 | void ARBDecompiler::Exit() { | ||
| 1413 | if (stage != ShaderType::Fragment) { | ||
| 1414 | AddLine("RET;"); | ||
| 1415 | return; | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | const auto safe_get_register = [this](u32 reg) -> std::string { | ||
| 1419 | // TODO(Rodrigo): Replace with contains once C++20 releases | ||
| 1420 | const auto& used_registers = ir.GetRegisters(); | ||
| 1421 | if (used_registers.find(reg) != used_registers.end()) { | ||
| 1422 | return fmt::format("R{}.x", reg); | ||
| 1423 | } | ||
| 1424 | return "{0, 0, 0, 0}.x"; | ||
| 1425 | }; | ||
| 1426 | |||
| 1427 | const auto& header = ir.GetHeader(); | ||
| 1428 | u32 current_reg = 0; | ||
| 1429 | for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { | ||
| 1430 | for (u32 component = 0; component < 4; ++component) { | ||
| 1431 | if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 1432 | continue; | ||
| 1433 | } | ||
| 1434 | AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), | ||
| 1435 | safe_get_register(current_reg)); | ||
| 1436 | ++current_reg; | ||
| 1437 | } | ||
| 1438 | } | ||
| 1439 | if (header.ps.omap.depth) { | ||
| 1440 | AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); | ||
| 1441 | } | ||
| 1442 | |||
| 1443 | AddLine("RET;"); | ||
| 1444 | } | ||
| 1445 | |||
| 1446 | std::string ARBDecompiler::Assign(Operation operation) { | ||
| 1447 | const Node& dest = operation[0]; | ||
| 1448 | const Node& src = operation[1]; | ||
| 1449 | |||
| 1450 | std::string dest_name; | ||
| 1451 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1452 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1453 | // Writing to Register::ZeroIndex is a no op | ||
| 1454 | return {}; | ||
| 1455 | } | ||
| 1456 | dest_name = fmt::format("R{}.x", gpr->GetIndex()); | ||
| 1457 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1458 | const u32 element = abuf->GetElement(); | ||
| 1459 | const char swizzle = Swizzle(element); | ||
| 1460 | switch (const Attribute::Index index = abuf->GetIndex()) { | ||
| 1461 | case Attribute::Index::Position: | ||
| 1462 | dest_name = fmt::format("result.position.{}", swizzle); | ||
| 1463 | break; | ||
| 1464 | case Attribute::Index::LayerViewportPointSize: | ||
| 1465 | switch (element) { | ||
| 1466 | case 0: | ||
| 1467 | UNIMPLEMENTED(); | ||
| 1468 | return {}; | ||
| 1469 | case 1: | ||
| 1470 | case 2: | ||
| 1471 | if (!device.HasNvViewportArray2()) { | ||
| 1472 | LOG_ERROR( | ||
| 1473 | Render_OpenGL, | ||
| 1474 | "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); | ||
| 1475 | return {}; | ||
| 1476 | } | ||
| 1477 | dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; | ||
| 1478 | break; | ||
| 1479 | case 3: | ||
| 1480 | dest_name = "result.pointsize.x"; | ||
| 1481 | break; | ||
| 1482 | } | ||
| 1483 | break; | ||
| 1484 | case Attribute::Index::ClipDistances0123: | ||
| 1485 | dest_name = fmt::format("result.clip[{}].x", element); | ||
| 1486 | break; | ||
| 1487 | case Attribute::Index::ClipDistances4567: | ||
| 1488 | dest_name = fmt::format("result.clip[{}].x", element + 4); | ||
| 1489 | break; | ||
| 1490 | default: | ||
| 1491 | if (!IsGenericAttribute(index)) { | ||
| 1492 | UNREACHABLE(); | ||
| 1493 | return {}; | ||
| 1494 | } | ||
| 1495 | dest_name = | ||
| 1496 | fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); | ||
| 1497 | break; | ||
| 1498 | } | ||
| 1499 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1500 | const std::string address = Visit(lmem->GetAddress()); | ||
| 1501 | AddLine("SHR.U {}, {}, 2;", address, address); | ||
| 1502 | dest_name = fmt::format("lmem[{}].x", address); | ||
| 1503 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1504 | AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); | ||
| 1505 | ResetTemporaries(); | ||
| 1506 | return {}; | ||
| 1507 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1508 | const std::string temporary = AllocTemporary(); | ||
| 1509 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1510 | Visit(gmem->GetBaseAddress())); | ||
| 1511 | AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1512 | temporary); | ||
| 1513 | ResetTemporaries(); | ||
| 1514 | return {}; | ||
| 1515 | } else { | ||
| 1516 | UNREACHABLE(); | ||
| 1517 | ResetTemporaries(); | ||
| 1518 | return {}; | ||
| 1519 | } | ||
| 1520 | |||
| 1521 | AddLine("MOV.U {}, {};", dest_name, Visit(src)); | ||
| 1522 | ResetTemporaries(); | ||
| 1523 | return {}; | ||
| 1524 | } | ||
| 1525 | |||
| 1526 | std::string ARBDecompiler::Select(Operation operation) { | ||
| 1527 | const std::string temporary = AllocTemporary(); | ||
| 1528 | AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), | ||
| 1529 | Visit(operation[2])); | ||
| 1530 | return temporary; | ||
| 1531 | } | ||
| 1532 | |||
| 1533 | std::string ARBDecompiler::FClamp(Operation operation) { | ||
| 1534 | // 1.0f in hex, replace with std::bit_cast on C++20 | ||
| 1535 | static constexpr u32 POSITIVE_ONE = 0x3f800000; | ||
| 1536 | |||
| 1537 | const std::string temporary = AllocTemporary(); | ||
| 1538 | const Node& value = operation[0]; | ||
| 1539 | const Node& low = operation[1]; | ||
| 1540 | const Node& high = operation[2]; | ||
| 1541 | const auto imm_low = std::get_if<ImmediateNode>(&*low); | ||
| 1542 | const auto imm_high = std::get_if<ImmediateNode>(&*high); | ||
| 1543 | if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { | ||
| 1544 | AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); | ||
| 1545 | } else { | ||
| 1546 | AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); | ||
| 1547 | AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); | ||
| 1548 | } | ||
| 1549 | return temporary; | ||
| 1550 | } | ||
| 1551 | |||
| 1552 | std::string ARBDecompiler::FCastHalf0(Operation operation) { | ||
| 1553 | const std::string temporary = AllocVectorTemporary(); | ||
| 1554 | AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1555 | return fmt::format("{}.x", temporary); | ||
| 1556 | } | ||
| 1557 | |||
| 1558 | std::string ARBDecompiler::FCastHalf1(Operation operation) { | ||
| 1559 | const std::string temporary = AllocVectorTemporary(); | ||
| 1560 | AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); | ||
| 1561 | AddLine("MOV {}.x, {}.y;", temporary, temporary); | ||
| 1562 | return fmt::format("{}.x", temporary); | ||
| 1563 | } | ||
| 1564 | |||
| 1565 | std::string ARBDecompiler::FSqrt(Operation operation) { | ||
| 1566 | const std::string temporary = AllocTemporary(); | ||
| 1567 | AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); | ||
| 1568 | AddLine("RCP.F32 {}, {};", temporary, temporary); | ||
| 1569 | return temporary; | ||
| 1570 | } | ||
| 1571 | |||
| 1572 | std::string ARBDecompiler::FSwizzleAdd(Operation operation) { | ||
| 1573 | LOG_WARNING(Render_OpenGL, "(STUBBED)"); | ||
| 1574 | const std::string temporary = AllocTemporary(); | ||
| 1575 | AddLine("ADD.F {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); | ||
| 1576 | return temporary; | ||
| 1577 | } | ||
| 1578 | |||
| 1579 | std::string ARBDecompiler::HAdd2(Operation operation) { | ||
| 1580 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1581 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1582 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1583 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1584 | AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1585 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1586 | return fmt::format("{}.x", tmp1); | ||
| 1587 | } | ||
| 1588 | |||
| 1589 | std::string ARBDecompiler::HMul2(Operation operation) { | ||
| 1590 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1591 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1592 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1593 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1594 | AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1595 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1596 | return fmt::format("{}.x", tmp1); | ||
| 1597 | } | ||
| 1598 | |||
| 1599 | std::string ARBDecompiler::HFma2(Operation operation) { | ||
| 1600 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1601 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1602 | const std::string tmp3 = AllocVectorTemporary(); | ||
| 1603 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1604 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1605 | AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); | ||
| 1606 | AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); | ||
| 1607 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1608 | return fmt::format("{}.x", tmp1); | ||
| 1609 | } | ||
| 1610 | |||
| 1611 | std::string ARBDecompiler::HAbsolute(Operation operation) { | ||
| 1612 | const std::string temporary = AllocVectorTemporary(); | ||
| 1613 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1614 | AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); | ||
| 1615 | return fmt::format("{}.x", temporary); | ||
| 1616 | } | ||
| 1617 | |||
| 1618 | std::string ARBDecompiler::HNegate(Operation operation) { | ||
| 1619 | const std::string temporary = AllocVectorTemporary(); | ||
| 1620 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1621 | AddLine("MOVC.S RC.x, {};", Visit(operation[1])); | ||
| 1622 | AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); | ||
| 1623 | AddLine("MOVC.S RC.x, {};", Visit(operation[2])); | ||
| 1624 | AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); | ||
| 1625 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1626 | return fmt::format("{}.x", temporary); | ||
| 1627 | } | ||
| 1628 | |||
| 1629 | std::string ARBDecompiler::HClamp(Operation operation) { | ||
| 1630 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1631 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1632 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1633 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); | ||
| 1634 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1635 | AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1636 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); | ||
| 1637 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1638 | AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1639 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1640 | return fmt::format("{}.x", tmp1); | ||
| 1641 | } | ||
| 1642 | |||
| 1643 | std::string ARBDecompiler::HCastFloat(Operation operation) { | ||
| 1644 | const std::string temporary = AllocVectorTemporary(); | ||
| 1645 | AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); | ||
| 1646 | AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1647 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1648 | return fmt::format("{}.x", temporary); | ||
| 1649 | } | ||
| 1650 | |||
| 1651 | std::string ARBDecompiler::HUnpack(Operation operation) { | ||
| 1652 | const std::string operand = Visit(operation[0]); | ||
| 1653 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1654 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1655 | return operand; | ||
| 1656 | case Tegra::Shader::HalfType::F32: { | ||
| 1657 | const std::string temporary = AllocVectorTemporary(); | ||
| 1658 | AddLine("MOV.U {}.x, {};", temporary, operand); | ||
| 1659 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1660 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1661 | return fmt::format("{}.x", temporary); | ||
| 1662 | } | ||
| 1663 | case Tegra::Shader::HalfType::H0_H0: { | ||
| 1664 | const std::string temporary = AllocVectorTemporary(); | ||
| 1665 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1666 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1667 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1668 | return fmt::format("{}.x", temporary); | ||
| 1669 | } | ||
| 1670 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1671 | const std::string temporary = AllocVectorTemporary(); | ||
| 1672 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1673 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 1674 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1675 | return fmt::format("{}.x", temporary); | ||
| 1676 | } | ||
| 1677 | } | ||
| 1678 | UNREACHABLE(); | ||
| 1679 | return "{0, 0, 0, 0}.x"; | ||
| 1680 | } | ||
| 1681 | |||
| 1682 | std::string ARBDecompiler::HMergeF32(Operation operation) { | ||
| 1683 | const std::string temporary = AllocVectorTemporary(); | ||
| 1684 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1685 | return fmt::format("{}.x", temporary); | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | std::string ARBDecompiler::HMergeH0(Operation operation) { | ||
| 1689 | const std::string temporary = AllocVectorTemporary(); | ||
| 1690 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1691 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1692 | AddLine("MOV.U {}.x, {}.z;", temporary, temporary); | ||
| 1693 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1694 | return fmt::format("{}.x", temporary); | ||
| 1695 | } | ||
| 1696 | |||
| 1697 | std::string ARBDecompiler::HMergeH1(Operation operation) { | ||
| 1698 | const std::string temporary = AllocVectorTemporary(); | ||
| 1699 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1700 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1701 | AddLine("MOV.U {}.y, {}.w;", temporary, temporary); | ||
| 1702 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1703 | return fmt::format("{}.x", temporary); | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | std::string ARBDecompiler::HPack2(Operation operation) { | ||
| 1707 | const std::string temporary = AllocVectorTemporary(); | ||
| 1708 | AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); | ||
| 1709 | AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); | ||
| 1710 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1711 | return fmt::format("{}.x", temporary); | ||
| 1712 | } | ||
| 1713 | |||
| 1714 | std::string ARBDecompiler::LogicalAssign(Operation operation) { | ||
| 1715 | const Node& dest = operation[0]; | ||
| 1716 | const Node& src = operation[1]; | ||
| 1717 | |||
| 1718 | std::string target; | ||
| 1719 | |||
| 1720 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1721 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1722 | |||
| 1723 | const Tegra::Shader::Pred index = pred->GetIndex(); | ||
| 1724 | switch (index) { | ||
| 1725 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1726 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1727 | // Writing to these predicates is a no-op | ||
| 1728 | return {}; | ||
| 1729 | } | ||
| 1730 | target = fmt::format("P{}.x", static_cast<u64>(index)); | ||
| 1731 | } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1732 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1733 | target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1734 | } else { | ||
| 1735 | UNREACHABLE(); | ||
| 1736 | ResetTemporaries(); | ||
| 1737 | return {}; | ||
| 1738 | } | ||
| 1739 | |||
| 1740 | AddLine("MOV.U {}, {};", target, Visit(src)); | ||
| 1741 | ResetTemporaries(); | ||
| 1742 | return {}; | ||
| 1743 | } | ||
| 1744 | |||
| 1745 | std::string ARBDecompiler::LogicalPick2(Operation operation) { | ||
| 1746 | const std::string temporary = AllocTemporary(); | ||
| 1747 | const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); | ||
| 1748 | AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); | ||
| 1749 | return temporary; | ||
| 1750 | } | ||
| 1751 | |||
| 1752 | std::string ARBDecompiler::LogicalAnd2(Operation operation) { | ||
| 1753 | const std::string temporary = AllocTemporary(); | ||
| 1754 | const std::string op = Visit(operation[0]); | ||
| 1755 | AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); | ||
| 1756 | return temporary; | ||
| 1757 | } | ||
| 1758 | |||
| 1759 | std::string ARBDecompiler::FloatOrdered(Operation operation) { | ||
| 1760 | const std::string temporary = AllocTemporary(); | ||
| 1761 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1762 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1763 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1764 | AddLine("MOV.S {} (NAN.x), 0;", temporary); | ||
| 1765 | AddLine("MOV.S {} (NAN.y), 0;", temporary); | ||
| 1766 | return temporary; | ||
| 1767 | } | ||
| 1768 | |||
| 1769 | std::string ARBDecompiler::FloatUnordered(Operation operation) { | ||
| 1770 | const std::string temporary = AllocTemporary(); | ||
| 1771 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1772 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1773 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1774 | AddLine("MOV.S {} (NAN.x), -1;", temporary); | ||
| 1775 | AddLine("MOV.S {} (NAN.y), -1;", temporary); | ||
| 1776 | return temporary; | ||
| 1777 | } | ||
| 1778 | |||
| 1779 | std::string ARBDecompiler::LogicalAddCarry(Operation operation) { | ||
| 1780 | const std::string temporary = AllocTemporary(); | ||
| 1781 | AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); | ||
| 1782 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1783 | AddLine("IF CF.x;"); | ||
| 1784 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1785 | AddLine("ENDIF;"); | ||
| 1786 | return temporary; | ||
| 1787 | } | ||
| 1788 | |||
| 1789 | std::string ARBDecompiler::Texture(Operation operation) { | ||
| 1790 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1791 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1792 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1793 | |||
| 1794 | std::string_view opcode = "TEX"; | ||
| 1795 | std::string extra; | ||
| 1796 | if (meta.bias) { | ||
| 1797 | ASSERT(!meta.lod); | ||
| 1798 | opcode = "TXB"; | ||
| 1799 | |||
| 1800 | if (swizzle < 4) { | ||
| 1801 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); | ||
| 1802 | } else { | ||
| 1803 | const std::string bias = AllocTemporary(); | ||
| 1804 | AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); | ||
| 1805 | extra = fmt::format(" {},", bias); | ||
| 1806 | } | ||
| 1807 | } | ||
| 1808 | if (meta.lod) { | ||
| 1809 | ASSERT(!meta.bias); | ||
| 1810 | opcode = "TXL"; | ||
| 1811 | |||
| 1812 | if (swizzle < 4) { | ||
| 1813 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1814 | } else { | ||
| 1815 | const std::string lod = AllocTemporary(); | ||
| 1816 | AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); | ||
| 1817 | extra = fmt::format(" {},", lod); | ||
| 1818 | } | ||
| 1819 | } | ||
| 1820 | |||
| 1821 | AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id, | ||
| 1822 | TextureType(meta), BuildAoffi(operation)); | ||
| 1823 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1824 | return fmt::format("{}.x", temporary); | ||
| 1825 | } | ||
| 1826 | |||
| 1827 | std::string ARBDecompiler::TextureGather(Operation operation) { | ||
| 1828 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1829 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1830 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1831 | |||
| 1832 | std::string comp; | ||
| 1833 | if (!meta.sampler.is_shadow) { | ||
| 1834 | const auto& immediate = std::get<ImmediateNode>(*meta.component); | ||
| 1835 | comp = fmt::format(".{}", Swizzle(immediate.GetValue())); | ||
| 1836 | } | ||
| 1837 | |||
| 1838 | AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, | ||
| 1839 | TextureType(meta), BuildAoffi(operation)); | ||
| 1840 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1841 | return fmt::format("{}.x", temporary); | ||
| 1842 | } | ||
| 1843 | |||
| 1844 | std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { | ||
| 1845 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1846 | const std::string temporary = AllocVectorTemporary(); | ||
| 1847 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1848 | |||
| 1849 | ASSERT(!meta.sampler.is_array); | ||
| 1850 | |||
| 1851 | const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; | ||
| 1852 | AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); | ||
| 1853 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1854 | return fmt::format("{}.x", temporary); | ||
| 1855 | } | ||
| 1856 | |||
| 1857 | std::string ARBDecompiler::TextureQueryLod(Operation operation) { | ||
| 1858 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1859 | const std::string temporary = AllocVectorTemporary(); | ||
| 1860 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1861 | |||
| 1862 | ASSERT(!meta.sampler.is_array); | ||
| 1863 | |||
| 1864 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1865 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1866 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1867 | } | ||
| 1868 | AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); | ||
| 1869 | AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); | ||
| 1870 | AddLine("TRUNC.S {}, {};", temporary, temporary); | ||
| 1871 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1872 | return fmt::format("{}.x", temporary); | ||
| 1873 | } | ||
| 1874 | |||
| 1875 | std::string ARBDecompiler::TexelFetch(Operation operation) { | ||
| 1876 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1877 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1878 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1879 | |||
| 1880 | if (!meta.sampler.is_buffer) { | ||
| 1881 | ASSERT(swizzle < 4); | ||
| 1882 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1883 | } | ||
| 1884 | AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta), | ||
| 1885 | BuildAoffi(operation)); | ||
| 1886 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1887 | return fmt::format("{}.x", temporary); | ||
| 1888 | } | ||
| 1889 | |||
| 1890 | std::string ARBDecompiler::TextureGradient(Operation operation) { | ||
| 1891 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1892 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1893 | const std::string ddx = AllocVectorTemporary(); | ||
| 1894 | const std::string ddy = AllocVectorTemporary(); | ||
| 1895 | const std::string coord = BuildCoords(operation).first; | ||
| 1896 | |||
| 1897 | const std::size_t num_components = meta.derivates.size() / 2; | ||
| 1898 | for (std::size_t index = 0; index < num_components; ++index) { | ||
| 1899 | const char swizzle = Swizzle(index); | ||
| 1900 | AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); | ||
| 1901 | AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); | ||
| 1902 | } | ||
| 1903 | |||
| 1904 | const std::string_view result = coord; | ||
| 1905 | AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, | ||
| 1906 | TextureType(meta), BuildAoffi(operation)); | ||
| 1907 | AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); | ||
| 1908 | return fmt::format("{}.x", result); | ||
| 1909 | } | ||
| 1910 | |||
| 1911 | std::string ARBDecompiler::ImageLoad(Operation operation) { | ||
| 1912 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1913 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1914 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1915 | const std::string_view type = ImageType(meta.image.type); | ||
| 1916 | |||
| 1917 | const std::string temporary = AllocVectorTemporary(); | ||
| 1918 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1919 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1920 | } | ||
| 1921 | AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); | ||
| 1922 | AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1923 | return fmt::format("{}.x", temporary); | ||
| 1924 | } | ||
| 1925 | |||
| 1926 | std::string ARBDecompiler::ImageStore(Operation operation) { | ||
| 1927 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1928 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1929 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 1930 | const std::size_t num_values = meta.values.size(); | ||
| 1931 | const std::string_view type = ImageType(meta.image.type); | ||
| 1932 | |||
| 1933 | const std::string coord = AllocVectorTemporary(); | ||
| 1934 | const std::string value = AllocVectorTemporary(); | ||
| 1935 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 1936 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 1937 | } | ||
| 1938 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 1939 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 1940 | } | ||
| 1941 | AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); | ||
| 1942 | return {}; | ||
| 1943 | } | ||
| 1944 | |||
| 1945 | std::string ARBDecompiler::Branch(Operation operation) { | ||
| 1946 | const auto target = std::get<ImmediateNode>(*operation[0]); | ||
| 1947 | AddLine("MOV.U PC.x, {};", target.GetValue()); | ||
| 1948 | AddLine("CONT;"); | ||
| 1949 | return {}; | ||
| 1950 | } | ||
| 1951 | |||
| 1952 | std::string ARBDecompiler::BranchIndirect(Operation operation) { | ||
| 1953 | AddLine("MOV.U PC.x, {};", Visit(operation[0])); | ||
| 1954 | AddLine("CONT;"); | ||
| 1955 | return {}; | ||
| 1956 | } | ||
| 1957 | |||
| 1958 | std::string ARBDecompiler::PushFlowStack(Operation operation) { | ||
| 1959 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1960 | const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); | ||
| 1961 | const std::string_view stack_name = StackName(stack); | ||
| 1962 | AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); | ||
| 1963 | AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 1964 | return {}; | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | std::string ARBDecompiler::PopFlowStack(Operation operation) { | ||
| 1968 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1969 | const std::string_view stack_name = StackName(stack); | ||
| 1970 | AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 1971 | AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); | ||
| 1972 | AddLine("CONT;"); | ||
| 1973 | return {}; | ||
| 1974 | } | ||
| 1975 | |||
| 1976 | std::string ARBDecompiler::Exit(Operation) { | ||
| 1977 | Exit(); | ||
| 1978 | return {}; | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | std::string ARBDecompiler::Discard(Operation) { | ||
| 1982 | AddLine("KIL TR;"); | ||
| 1983 | return {}; | ||
| 1984 | } | ||
| 1985 | |||
| 1986 | std::string ARBDecompiler::EmitVertex(Operation) { | ||
| 1987 | AddLine("EMIT;"); | ||
| 1988 | return {}; | ||
| 1989 | } | ||
| 1990 | |||
| 1991 | std::string ARBDecompiler::EndPrimitive(Operation) { | ||
| 1992 | AddLine("ENDPRIM;"); | ||
| 1993 | return {}; | ||
| 1994 | } | ||
| 1995 | |||
| 1996 | std::string ARBDecompiler::InvocationId(Operation) { | ||
| 1997 | return "primitive.invocation"; | ||
| 1998 | } | ||
| 1999 | |||
| 2000 | std::string ARBDecompiler::YNegate(Operation) { | ||
| 2001 | LOG_WARNING(Render_OpenGL, "(STUBBED)"); | ||
| 2002 | const std::string temporary = AllocTemporary(); | ||
| 2003 | AddLine("MOV.F {}, 1;", temporary); | ||
| 2004 | return temporary; | ||
| 2005 | } | ||
| 2006 | |||
| 2007 | std::string ARBDecompiler::ThreadId(Operation) { | ||
| 2008 | return fmt::format("{}.threadid", StageInputName(stage)); | ||
| 2009 | } | ||
| 2010 | |||
| 2011 | std::string ARBDecompiler::ShuffleIndexed(Operation operation) { | ||
| 2012 | if (!device.HasWarpIntrinsics()) { | ||
| 2013 | LOG_ERROR(Render_OpenGL, | ||
| 2014 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 2015 | return Visit(operation[0]); | ||
| 2016 | } | ||
| 2017 | const std::string temporary = AllocVectorTemporary(); | ||
| 2018 | AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), | ||
| 2019 | Visit(operation[1])); | ||
| 2020 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 2021 | return fmt::format("{}.x", temporary); | ||
| 2022 | } | ||
| 2023 | |||
| 2024 | std::string ARBDecompiler::Barrier(Operation) { | ||
| 2025 | if (!ir.IsDecompiled()) { | ||
| 2026 | LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled"); | ||
| 2027 | return {}; | ||
| 2028 | } | ||
| 2029 | AddLine("BAR;"); | ||
| 2030 | return {}; | ||
| 2031 | } | ||
| 2032 | |||
| 2033 | std::string ARBDecompiler::MemoryBarrierGroup(Operation) { | ||
| 2034 | AddLine("MEMBAR.CTA;"); | ||
| 2035 | return {}; | ||
| 2036 | } | ||
| 2037 | |||
| 2038 | std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { | ||
| 2039 | AddLine("MEMBAR;"); | ||
| 2040 | return {}; | ||
| 2041 | } | ||
| 2042 | |||
| 2043 | } // Anonymous namespace | ||
| 2044 | |||
| 2045 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 2046 | const VideoCommon::Shader::Registry& registry, | ||
| 2047 | Tegra::Engines::ShaderType stage, std::string_view identifier) { | ||
| 2048 | return ARBDecompiler(device, ir, registry, stage, identifier).Code(); | ||
| 2049 | } | ||
| 2050 | |||
| 2051 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h new file mode 100644 index 000000000..6afc87220 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Tegra::Engines { | ||
| 13 | enum class ShaderType : u32; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | class Registry; | ||
| 19 | } // namespace VideoCommon::Shader | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | class Device; | ||
| 24 | |||
| 25 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 26 | const VideoCommon::Shader::Registry& registry, | ||
| 27 | Tegra::Engines::ShaderType stage, std::string_view identifier); | ||
| 28 | |||
| 29 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 890fc6c63..e245e27ec 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -213,6 +213,7 @@ Device::Device() | |||
| 213 | has_component_indexing_bug = is_amd; | 213 | has_component_indexing_bug = is_amd; |
| 214 | has_precise_bug = TestPreciseBug(); | 214 | has_precise_bug = TestPreciseBug(); |
| 215 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; | 215 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; |
| 216 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | ||
| 216 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | 217 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && |
| 217 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && | 218 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && |
| 218 | GLAD_GL_NV_transform_feedback2; | 219 | GLAD_GL_NV_transform_feedback2; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 98cca0254..145347943 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -88,6 +88,10 @@ public: | |||
| 88 | return has_fast_buffer_sub_data; | 88 | return has_fast_buffer_sub_data; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | bool HasNvViewportArray2() const { | ||
| 92 | return has_nv_viewport_array2; | ||
| 93 | } | ||
| 94 | |||
| 91 | bool UseAssemblyShaders() const { | 95 | bool UseAssemblyShaders() const { |
| 92 | return use_assembly_shaders; | 96 | return use_assembly_shaders; |
| 93 | } | 97 | } |
| @@ -111,6 +115,7 @@ private: | |||
| 111 | bool has_component_indexing_bug{}; | 115 | bool has_component_indexing_bug{}; |
| 112 | bool has_precise_bug{}; | 116 | bool has_precise_bug{}; |
| 113 | bool has_fast_buffer_sub_data{}; | 117 | bool has_fast_buffer_sub_data{}; |
| 118 | bool has_nv_viewport_array2{}; | ||
| 114 | bool use_assembly_shaders{}; | 119 | bool use_assembly_shaders{}; |
| 115 | }; | 120 | }; |
| 116 | 121 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index a991ca64a..f539a05e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/memory_manager.h" | 22 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 25 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 26 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| @@ -147,7 +148,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u | |||
| 147 | auto program = std::make_shared<ProgramHandle>(); | 148 | auto program = std::make_shared<ProgramHandle>(); |
| 148 | 149 | ||
| 149 | if (device.UseAssemblyShaders()) { | 150 | if (device.UseAssemblyShaders()) { |
| 150 | const std::string arb = "Not implemented"; | 151 | const std::string arb = |
| 152 | DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); | ||
| 151 | 153 | ||
| 152 | GLuint& arb_prog = program->assembly_program.handle; | 154 | GLuint& arb_prog = program->assembly_program.handle; |
| 153 | 155 | ||