diff options
Diffstat (limited to '')
| -rw-r--r-- | CMakeModules/GenerateSCMRev.cmake | 2 | ||||
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.cpp | 2074 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.h | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_graphics_advanced.cpp | 3 |
9 files changed, 2118 insertions, 4 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 83e4e9df2..311ba1c2e 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -51,6 +51,8 @@ endif() | |||
| 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) | 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) |
| 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") | 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") |
| 53 | set(HASH_FILES | 53 | set(HASH_FILES |
| 54 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 55 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 54 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | 56 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |
| 55 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | 57 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |
| 56 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | 58 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 24b7a083c..0a3e2f4d1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 32 | DEPENDS | 32 | DEPENDS |
| 33 | # WARNING! It was too much work to try and make a common location for this list, | 33 | # WARNING! It was too much work to try and make a common location for this list, |
| 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well | 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well |
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |
| 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 39d5d8401..099bb446e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -52,6 +52,8 @@ add_library(video_core STATIC | |||
| 52 | rasterizer_interface.h | 52 | rasterizer_interface.h |
| 53 | renderer_base.cpp | 53 | renderer_base.cpp |
| 54 | renderer_base.h | 54 | renderer_base.h |
| 55 | renderer_opengl/gl_arb_decompiler.cpp | ||
| 56 | renderer_opengl/gl_arb_decompiler.h | ||
| 55 | renderer_opengl/gl_buffer_cache.cpp | 57 | renderer_opengl/gl_buffer_cache.cpp |
| 56 | renderer_opengl/gl_buffer_cache.h | 58 | renderer_opengl/gl_buffer_cache.h |
| 57 | renderer_opengl/gl_device.cpp | 59 | renderer_opengl/gl_device.cpp |
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp new file mode 100644 index 000000000..1e96b0310 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp | |||
| @@ -0,0 +1,2074 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <variant> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/assert.h" | ||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 20 | #include "video_core/shader/registry.h" | ||
| 21 | #include "video_core/shader/shader_ir.h" | ||
| 22 | |||
| 23 | // Predicates in the decompiled code follow the convention that -1 means true and 0 means false. | ||
| 24 | // GLASM lacks booleans, so they have to be implemented as integers. | ||
| 25 | // Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to | ||
| 26 | // select between two values, because -1 will be evaluated as true and 0 as false. | ||
| 27 | |||
| 28 | namespace OpenGL { | ||
| 29 | |||
| 30 | namespace { | ||
| 31 | |||
| 32 | using Tegra::Engines::ShaderType; | ||
| 33 | using Tegra::Shader::Attribute; | ||
| 34 | using Tegra::Shader::PixelImap; | ||
| 35 | using Tegra::Shader::Register; | ||
| 36 | using namespace VideoCommon::Shader; | ||
| 37 | using Operation = const OperationNode&; | ||
| 38 | |||
| 39 | constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; | ||
| 40 | |||
| 41 | char Swizzle(std::size_t component) { | ||
| 42 | ASSERT(component < 4); | ||
| 43 | return component["xyzw"]; | ||
| 44 | } | ||
| 45 | |||
| 46 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 47 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 48 | } | ||
| 49 | |||
| 50 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 51 | ASSERT(IsGenericAttribute(index)); | ||
| 52 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string_view Modifiers(Operation operation) { | ||
| 56 | const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); | ||
| 57 | if (meta && meta->precise) { | ||
| 58 | return ".PREC"; | ||
| 59 | } | ||
| 60 | return ""; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view GetInputFlags(PixelImap attribute) { | ||
| 64 | switch (attribute) { | ||
| 65 | case PixelImap::Perspective: | ||
| 66 | return ""; | ||
| 67 | case PixelImap::Constant: | ||
| 68 | return "FLAT "; | ||
| 69 | case PixelImap::ScreenLinear: | ||
| 70 | return "NOPERSPECTIVE "; | ||
| 71 | case PixelImap::Unused: | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); | ||
| 75 | return {}; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string_view ImageType(Tegra::Shader::ImageType image_type) { | ||
| 79 | switch (image_type) { | ||
| 80 | case Tegra::Shader::ImageType::Texture1D: | ||
| 81 | return "1D"; | ||
| 82 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 83 | return "BUFFER"; | ||
| 84 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 85 | return "ARRAY1D"; | ||
| 86 | case Tegra::Shader::ImageType::Texture2D: | ||
| 87 | return "2D"; | ||
| 88 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 89 | return "ARRAY2D"; | ||
| 90 | case Tegra::Shader::ImageType::Texture3D: | ||
| 91 | return "3D"; | ||
| 92 | } | ||
| 93 | UNREACHABLE(); | ||
| 94 | return {}; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::string_view StackName(MetaStackClass stack) { | ||
| 98 | switch (stack) { | ||
| 99 | case MetaStackClass::Ssy: | ||
| 100 | return "SSY"; | ||
| 101 | case MetaStackClass::Pbk: | ||
| 102 | return "PBK"; | ||
| 103 | } | ||
| 104 | UNREACHABLE(); | ||
| 105 | return ""; | ||
| 106 | }; | ||
| 107 | |||
| 108 | std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | ||
| 109 | switch (topology) { | ||
| 110 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: | ||
| 111 | return "POINTS"; | ||
| 112 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: | ||
| 113 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: | ||
| 114 | return "LINES"; | ||
| 115 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: | ||
| 116 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: | ||
| 117 | return "LINES_ADJACENCY"; | ||
| 118 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: | ||
| 119 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | ||
| 120 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | ||
| 121 | return "TRIANGLES"; | ||
| 122 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | ||
| 123 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||
| 124 | return "TRIANGLES_ADJACENCY"; | ||
| 125 | default: | ||
| 126 | UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); | ||
| 127 | return "POINTS"; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 132 | switch (topology) { | ||
| 133 | case Tegra::Shader::OutputTopology::PointList: | ||
| 134 | return "POINTS"; | ||
| 135 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 136 | return "LINE_STRIP"; | ||
| 137 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 138 | return "TRIANGLE_STRIP"; | ||
| 139 | default: | ||
| 140 | UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); | ||
| 141 | return "points"; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | std::string_view StageInputName(ShaderType stage) { | ||
| 146 | switch (stage) { | ||
| 147 | case ShaderType::Vertex: | ||
| 148 | case ShaderType::Geometry: | ||
| 149 | return "vertex"; | ||
| 150 | case ShaderType::Fragment: | ||
| 151 | return "fragment"; | ||
| 152 | case ShaderType::Compute: | ||
| 153 | return "invocation"; | ||
| 154 | default: | ||
| 155 | UNREACHABLE(); | ||
| 156 | return ""; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | std::string TextureType(const MetaTexture& meta) { | ||
| 161 | if (meta.sampler.is_buffer) { | ||
| 162 | return "BUFFER"; | ||
| 163 | } | ||
| 164 | std::string type; | ||
| 165 | if (meta.sampler.is_shadow) { | ||
| 166 | type += "SHADOW"; | ||
| 167 | } | ||
| 168 | if (meta.sampler.is_array) { | ||
| 169 | type += "ARRAY"; | ||
| 170 | } | ||
| 171 | type += [&meta] { | ||
| 172 | switch (meta.sampler.type) { | ||
| 173 | case Tegra::Shader::TextureType::Texture1D: | ||
| 174 | return "1D"; | ||
| 175 | case Tegra::Shader::TextureType::Texture2D: | ||
| 176 | return "2D"; | ||
| 177 | case Tegra::Shader::TextureType::Texture3D: | ||
| 178 | return "3D"; | ||
| 179 | case Tegra::Shader::TextureType::TextureCube: | ||
| 180 | return "CUBE"; | ||
| 181 | } | ||
| 182 | UNREACHABLE(); | ||
| 183 | return "2D"; | ||
| 184 | }(); | ||
| 185 | return type; | ||
| 186 | } | ||
| 187 | |||
| 188 | std::string GlobalMemoryName(const GlobalMemoryBase& base) { | ||
| 189 | return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); | ||
| 190 | } | ||
| 191 | |||
| 192 | class ARBDecompiler final { | ||
| 193 | public: | ||
| 194 | explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 195 | ShaderType stage, std::string_view identifier); | ||
| 196 | |||
| 197 | std::string Code() const { | ||
| 198 | return shader_source; | ||
| 199 | } | ||
| 200 | |||
| 201 | private: | ||
| 202 | void DeclareHeader(); | ||
| 203 | void DeclareVertex(); | ||
| 204 | void DeclareGeometry(); | ||
| 205 | void DeclareFragment(); | ||
| 206 | void DeclareCompute(); | ||
| 207 | void DeclareInputAttributes(); | ||
| 208 | void DeclareOutputAttributes(); | ||
| 209 | void DeclareLocalMemory(); | ||
| 210 | void DeclareGlobalMemory(); | ||
| 211 | void DeclareConstantBuffers(); | ||
| 212 | void DeclareRegisters(); | ||
| 213 | void DeclareTemporaries(); | ||
| 214 | void DeclarePredicates(); | ||
| 215 | void DeclareInternalFlags(); | ||
| 216 | |||
| 217 | void InitializeVariables(); | ||
| 218 | |||
| 219 | void DecompileAST(); | ||
| 220 | void DecompileBranchMode(); | ||
| 221 | |||
| 222 | void VisitAST(const ASTNode& node); | ||
| 223 | std::string VisitExpression(const Expr& node); | ||
| 224 | |||
| 225 | void VisitBlock(const NodeBlock& bb); | ||
| 226 | |||
| 227 | std::string Visit(const Node& node); | ||
| 228 | |||
| 229 | std::pair<std::string, std::size_t> BuildCoords(Operation); | ||
| 230 | std::string BuildAoffi(Operation); | ||
| 231 | void Exit(); | ||
| 232 | |||
| 233 | std::string Assign(Operation); | ||
| 234 | std::string Select(Operation); | ||
| 235 | std::string FClamp(Operation); | ||
| 236 | std::string FCastHalf0(Operation); | ||
| 237 | std::string FCastHalf1(Operation); | ||
| 238 | std::string FSqrt(Operation); | ||
| 239 | std::string FSwizzleAdd(Operation); | ||
| 240 | std::string HAdd2(Operation); | ||
| 241 | std::string HMul2(Operation); | ||
| 242 | std::string HFma2(Operation); | ||
| 243 | std::string HAbsolute(Operation); | ||
| 244 | std::string HNegate(Operation); | ||
| 245 | std::string HClamp(Operation); | ||
| 246 | std::string HCastFloat(Operation); | ||
| 247 | std::string HUnpack(Operation); | ||
| 248 | std::string HMergeF32(Operation); | ||
| 249 | std::string HMergeH0(Operation); | ||
| 250 | std::string HMergeH1(Operation); | ||
| 251 | std::string HPack2(Operation); | ||
| 252 | std::string LogicalAssign(Operation); | ||
| 253 | std::string LogicalPick2(Operation); | ||
| 254 | std::string LogicalAnd2(Operation); | ||
| 255 | std::string FloatOrdered(Operation); | ||
| 256 | std::string FloatUnordered(Operation); | ||
| 257 | std::string LogicalAddCarry(Operation); | ||
| 258 | std::string Texture(Operation); | ||
| 259 | std::string TextureGather(Operation); | ||
| 260 | std::string TextureQueryDimensions(Operation); | ||
| 261 | std::string TextureQueryLod(Operation); | ||
| 262 | std::string TexelFetch(Operation); | ||
| 263 | std::string TextureGradient(Operation); | ||
| 264 | std::string ImageLoad(Operation); | ||
| 265 | std::string ImageStore(Operation); | ||
| 266 | std::string Branch(Operation); | ||
| 267 | std::string BranchIndirect(Operation); | ||
| 268 | std::string PushFlowStack(Operation); | ||
| 269 | std::string PopFlowStack(Operation); | ||
| 270 | std::string Exit(Operation); | ||
| 271 | std::string Discard(Operation); | ||
| 272 | std::string EmitVertex(Operation); | ||
| 273 | std::string EndPrimitive(Operation); | ||
| 274 | std::string InvocationId(Operation); | ||
| 275 | std::string YNegate(Operation); | ||
| 276 | std::string ThreadId(Operation); | ||
| 277 | std::string ShuffleIndexed(Operation); | ||
| 278 | std::string Barrier(Operation); | ||
| 279 | std::string MemoryBarrierGroup(Operation); | ||
| 280 | std::string MemoryBarrierGlobal(Operation); | ||
| 281 | |||
| 282 | template <const std::string_view& op> | ||
| 283 | std::string Unary(Operation operation) { | ||
| 284 | const std::string temporary = AllocTemporary(); | ||
| 285 | AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); | ||
| 286 | return temporary; | ||
| 287 | } | ||
| 288 | |||
| 289 | template <const std::string_view& op> | ||
| 290 | std::string Binary(Operation operation) { | ||
| 291 | const std::string temporary = AllocTemporary(); | ||
| 292 | AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 293 | Visit(operation[1])); | ||
| 294 | return temporary; | ||
| 295 | } | ||
| 296 | |||
| 297 | template <const std::string_view& op> | ||
| 298 | std::string Trinary(Operation operation) { | ||
| 299 | const std::string temporary = AllocTemporary(); | ||
| 300 | AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 301 | Visit(operation[1]), Visit(operation[2])); | ||
| 302 | return temporary; | ||
| 303 | } | ||
| 304 | |||
| 305 | template <const std::string_view& op, bool unordered> | ||
| 306 | std::string FloatComparison(Operation operation) { | ||
| 307 | const std::string temporary = AllocTemporary(); | ||
| 308 | AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); | ||
| 309 | AddLine("MOV.S {}, 0;", temporary); | ||
| 310 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 311 | |||
| 312 | const std::string op_a = Visit(operation[0]); | ||
| 313 | const std::string op_b = Visit(operation[1]); | ||
| 314 | if constexpr (unordered) { | ||
| 315 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 316 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 317 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 318 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 319 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 320 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 321 | } else if (op == SNE_F) { | ||
| 322 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 323 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 324 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 325 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 326 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 327 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 328 | } | ||
| 329 | return temporary; | ||
| 330 | } | ||
| 331 | |||
| 332 | template <const std::string_view& op, bool is_nan> | ||
| 333 | std::string HalfComparison(Operation operation) { | ||
| 334 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 335 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 336 | const std::string op_a = Visit(operation[0]); | ||
| 337 | const std::string op_b = Visit(operation[1]); | ||
| 338 | AddLine("UP2H.F {}, {};", tmp1, op_a); | ||
| 339 | AddLine("UP2H.F {}, {};", tmp2, op_b); | ||
| 340 | AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); | ||
| 341 | AddLine("TRUNC.U.CC RC.xy, {};", tmp1); | ||
| 342 | AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); | ||
| 343 | AddLine("MOV.S {}.x (NE.x), -1;", tmp1); | ||
| 344 | AddLine("MOV.S {}.y (NE.y), -1;", tmp1); | ||
| 345 | if constexpr (is_nan) { | ||
| 346 | AddLine("MOVC.F RC.x, {};", op_a); | ||
| 347 | AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); | ||
| 348 | AddLine("MOVC.F RC.x, {};", op_b); | ||
| 349 | AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); | ||
| 350 | } | ||
| 351 | return tmp1; | ||
| 352 | } | ||
| 353 | |||
| 354 | template <const std::string_view& op, const std::string_view& type> | ||
| 355 | std::string AtomicImage(Operation operation) { | ||
| 356 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 357 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 358 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 359 | const std::size_t num_values = meta.values.size(); | ||
| 360 | |||
| 361 | const std::string coord = AllocVectorTemporary(); | ||
| 362 | const std::string value = AllocVectorTemporary(); | ||
| 363 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 364 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 365 | } | ||
| 366 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 367 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 368 | } | ||
| 369 | |||
| 370 | const std::string result = coord; | ||
| 371 | AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, result, value, coord, | ||
| 372 | image_id, ImageType(meta.image.type)); | ||
| 373 | return fmt::format("{}.x", result); | ||
| 374 | } | ||
| 375 | |||
| 376 | template <const std::string_view& op, const std::string_view& type> | ||
| 377 | std::string Atomic(Operation operation) { | ||
| 378 | const std::string temporary = AllocTemporary(); | ||
| 379 | std::string address; | ||
| 380 | std::string_view opname; | ||
| 381 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||
| 382 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 383 | Visit(gmem->GetBaseAddress())); | ||
| 384 | address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); | ||
| 385 | opname = "ATOMB"; | ||
| 386 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||
| 387 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | ||
| 388 | opname = "ATOMS"; | ||
| 389 | } else { | ||
| 390 | UNREACHABLE(); | ||
| 391 | return "{0, 0, 0, 0}"; | ||
| 392 | } | ||
| 393 | AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | ||
| 394 | return temporary; | ||
| 395 | } | ||
| 396 | |||
| 397 | template <char type> | ||
| 398 | std::string Negate(Operation operation) { | ||
| 399 | const std::string temporary = AllocTemporary(); | ||
| 400 | if constexpr (type == 'F') { | ||
| 401 | AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); | ||
| 402 | } else { | ||
| 403 | AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); | ||
| 404 | } | ||
| 405 | return temporary; | ||
| 406 | } | ||
| 407 | |||
| 408 | template <char type> | ||
| 409 | std::string Absolute(Operation operation) { | ||
| 410 | const std::string temporary = AllocTemporary(); | ||
| 411 | AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); | ||
| 412 | return temporary; | ||
| 413 | } | ||
| 414 | |||
| 415 | template <char type> | ||
| 416 | std::string BitfieldInsert(Operation operation) { | ||
| 417 | const std::string temporary = AllocVectorTemporary(); | ||
| 418 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); | ||
| 419 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); | ||
| 420 | AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), | ||
| 421 | Visit(operation[0])); | ||
| 422 | return fmt::format("{}.x", temporary); | ||
| 423 | } | ||
| 424 | |||
| 425 | template <char type> | ||
| 426 | std::string BitfieldExtract(Operation operation) { | ||
| 427 | const std::string temporary = AllocVectorTemporary(); | ||
| 428 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); | ||
| 429 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); | ||
| 430 | AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); | ||
| 431 | return fmt::format("{}.x", temporary); | ||
| 432 | } | ||
| 433 | |||
| 434 | template <char swizzle> | ||
| 435 | std::string LocalInvocationId(Operation) { | ||
| 436 | return fmt::format("invocation.localid.{}", swizzle); | ||
| 437 | } | ||
| 438 | |||
| 439 | template <char swizzle> | ||
| 440 | std::string WorkGroupId(Operation) { | ||
| 441 | return fmt::format("invocation.groupid.{}", swizzle); | ||
| 442 | } | ||
| 443 | |||
| 444 | template <char c1, char c2> | ||
| 445 | std::string ThreadMask(Operation) { | ||
| 446 | return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); | ||
| 447 | } | ||
| 448 | |||
| 449 | template <typename... Args> | ||
| 450 | void AddExpression(std::string_view text, Args&&... args) { | ||
| 451 | shader_source += fmt::format(text, std::forward<Args>(args)...); | ||
| 452 | } | ||
| 453 | |||
| 454 | template <typename... Args> | ||
| 455 | void AddLine(std::string_view text, Args&&... args) { | ||
| 456 | AddExpression(text, std::forward<Args>(args)...); | ||
| 457 | shader_source += '\n'; | ||
| 458 | } | ||
| 459 | |||
| 460 | std::string AllocTemporary() { | ||
| 461 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 462 | return fmt::format("T{}.x", num_temporaries++); | ||
| 463 | } | ||
| 464 | |||
| 465 | std::string AllocVectorTemporary() { | ||
| 466 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 467 | return fmt::format("T{}", num_temporaries++); | ||
| 468 | } | ||
| 469 | |||
| 470 | void ResetTemporaries() noexcept { | ||
| 471 | num_temporaries = 0; | ||
| 472 | } | ||
| 473 | |||
| 474 | const Device& device; | ||
| 475 | const ShaderIR& ir; | ||
| 476 | const Registry& registry; | ||
| 477 | const ShaderType stage; | ||
| 478 | |||
| 479 | std::size_t num_temporaries = 0; | ||
| 480 | std::size_t max_temporaries = 0; | ||
| 481 | |||
| 482 | std::string shader_source; | ||
| 483 | |||
| 484 | static constexpr std::string_view ADD_F32 = "ADD.F32"; | ||
| 485 | static constexpr std::string_view ADD_S = "ADD.S"; | ||
| 486 | static constexpr std::string_view ADD_U = "ADD.U"; | ||
| 487 | static constexpr std::string_view MUL_F32 = "MUL.F32"; | ||
| 488 | static constexpr std::string_view MUL_S = "MUL.S"; | ||
| 489 | static constexpr std::string_view MUL_U = "MUL.U"; | ||
| 490 | static constexpr std::string_view DIV_F32 = "DIV.F32"; | ||
| 491 | static constexpr std::string_view DIV_S = "DIV.S"; | ||
| 492 | static constexpr std::string_view DIV_U = "DIV.U"; | ||
| 493 | static constexpr std::string_view MAD_F32 = "MAD.F32"; | ||
| 494 | static constexpr std::string_view RSQ_F32 = "RSQ.F32"; | ||
| 495 | static constexpr std::string_view COS_F32 = "COS.F32"; | ||
| 496 | static constexpr std::string_view SIN_F32 = "SIN.F32"; | ||
| 497 | static constexpr std::string_view EX2_F32 = "EX2.F32"; | ||
| 498 | static constexpr std::string_view LG2_F32 = "LG2.F32"; | ||
| 499 | static constexpr std::string_view SLT_F = "SLT.F32"; | ||
| 500 | static constexpr std::string_view SLT_S = "SLT.S"; | ||
| 501 | static constexpr std::string_view SLT_U = "SLT.U"; | ||
| 502 | static constexpr std::string_view SEQ_F = "SEQ.F32"; | ||
| 503 | static constexpr std::string_view SEQ_S = "SEQ.S"; | ||
| 504 | static constexpr std::string_view SEQ_U = "SEQ.U"; | ||
| 505 | static constexpr std::string_view SLE_F = "SLE.F32"; | ||
| 506 | static constexpr std::string_view SLE_S = "SLE.S"; | ||
| 507 | static constexpr std::string_view SLE_U = "SLE.U"; | ||
| 508 | static constexpr std::string_view SGT_F = "SGT.F32"; | ||
| 509 | static constexpr std::string_view SGT_S = "SGT.S"; | ||
| 510 | static constexpr std::string_view SGT_U = "SGT.U"; | ||
| 511 | static constexpr std::string_view SNE_F = "SNE.F32"; | ||
| 512 | static constexpr std::string_view SNE_S = "SNE.S"; | ||
| 513 | static constexpr std::string_view SNE_U = "SNE.U"; | ||
| 514 | static constexpr std::string_view SGE_F = "SGE.F32"; | ||
| 515 | static constexpr std::string_view SGE_S = "SGE.S"; | ||
| 516 | static constexpr std::string_view SGE_U = "SGE.U"; | ||
| 517 | static constexpr std::string_view AND_S = "AND.S"; | ||
| 518 | static constexpr std::string_view AND_U = "AND.U"; | ||
| 519 | static constexpr std::string_view TRUNC_F = "TRUNC.F"; | ||
| 520 | static constexpr std::string_view TRUNC_S = "TRUNC.S"; | ||
| 521 | static constexpr std::string_view TRUNC_U = "TRUNC.U"; | ||
| 522 | static constexpr std::string_view SHL_S = "SHL.S"; | ||
| 523 | static constexpr std::string_view SHL_U = "SHL.U"; | ||
| 524 | static constexpr std::string_view SHR_S = "SHR.S"; | ||
| 525 | static constexpr std::string_view SHR_U = "SHR.U"; | ||
| 526 | static constexpr std::string_view OR_S = "OR.S"; | ||
| 527 | static constexpr std::string_view OR_U = "OR.U"; | ||
| 528 | static constexpr std::string_view XOR_S = "XOR.S"; | ||
| 529 | static constexpr std::string_view XOR_U = "XOR.U"; | ||
| 530 | static constexpr std::string_view NOT_S = "NOT.S"; | ||
| 531 | static constexpr std::string_view NOT_U = "NOT.U"; | ||
| 532 | static constexpr std::string_view BTC_S = "BTC.S"; | ||
| 533 | static constexpr std::string_view BTC_U = "BTC.U"; | ||
| 534 | static constexpr std::string_view BTFM_S = "BTFM.S"; | ||
| 535 | static constexpr std::string_view BTFM_U = "BTFM.U"; | ||
| 536 | static constexpr std::string_view ROUND_F = "ROUND.F"; | ||
| 537 | static constexpr std::string_view CEIL_F = "CEIL.F"; | ||
| 538 | static constexpr std::string_view FLR_F = "FLR.F"; | ||
| 539 | static constexpr std::string_view I2F_S = "I2F.S"; | ||
| 540 | static constexpr std::string_view I2F_U = "I2F.U"; | ||
| 541 | static constexpr std::string_view MIN_F = "MIN.F"; | ||
| 542 | static constexpr std::string_view MIN_S = "MIN.S"; | ||
| 543 | static constexpr std::string_view MIN_U = "MIN.U"; | ||
| 544 | static constexpr std::string_view MAX_F = "MAX.F"; | ||
| 545 | static constexpr std::string_view MAX_S = "MAX.S"; | ||
| 546 | static constexpr std::string_view MAX_U = "MAX.U"; | ||
| 547 | static constexpr std::string_view MOV_U = "MOV.U"; | ||
| 548 | static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; | ||
| 549 | static constexpr std::string_view TGALL_U = "TGALL.U"; | ||
| 550 | static constexpr std::string_view TGANY_U = "TGANY.U"; | ||
| 551 | static constexpr std::string_view TGEQ_U = "TGEQ.U"; | ||
| 552 | static constexpr std::string_view EXCH = "EXCH"; | ||
| 553 | static constexpr std::string_view ADD = "ADD"; | ||
| 554 | static constexpr std::string_view MIN = "MIN"; | ||
| 555 | static constexpr std::string_view MAX = "MAX"; | ||
| 556 | static constexpr std::string_view AND = "AND"; | ||
| 557 | static constexpr std::string_view OR = "OR"; | ||
| 558 | static constexpr std::string_view XOR = "XOR"; | ||
| 559 | static constexpr std::string_view U32 = "U32"; | ||
| 560 | static constexpr std::string_view S32 = "S32"; | ||
| 561 | |||
| 562 | static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); | ||
| 563 | using DecompilerType = std::string (ARBDecompiler::*)(Operation); | ||
| 564 | static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { | ||
| 565 | &ARBDecompiler::Assign, | ||
| 566 | |||
| 567 | &ARBDecompiler::Select, | ||
| 568 | |||
| 569 | &ARBDecompiler::Binary<ADD_F32>, | ||
| 570 | &ARBDecompiler::Binary<MUL_F32>, | ||
| 571 | &ARBDecompiler::Binary<DIV_F32>, | ||
| 572 | &ARBDecompiler::Trinary<MAD_F32>, | ||
| 573 | &ARBDecompiler::Negate<'F'>, | ||
| 574 | &ARBDecompiler::Absolute<'F'>, | ||
| 575 | &ARBDecompiler::FClamp, | ||
| 576 | &ARBDecompiler::FCastHalf0, | ||
| 577 | &ARBDecompiler::FCastHalf1, | ||
| 578 | &ARBDecompiler::Binary<MIN_F>, | ||
| 579 | &ARBDecompiler::Binary<MAX_F>, | ||
| 580 | &ARBDecompiler::Unary<COS_F32>, | ||
| 581 | &ARBDecompiler::Unary<SIN_F32>, | ||
| 582 | &ARBDecompiler::Unary<EX2_F32>, | ||
| 583 | &ARBDecompiler::Unary<LG2_F32>, | ||
| 584 | &ARBDecompiler::Unary<RSQ_F32>, | ||
| 585 | &ARBDecompiler::FSqrt, | ||
| 586 | &ARBDecompiler::Unary<ROUND_F>, | ||
| 587 | &ARBDecompiler::Unary<FLR_F>, | ||
| 588 | &ARBDecompiler::Unary<CEIL_F>, | ||
| 589 | &ARBDecompiler::Unary<TRUNC_F>, | ||
| 590 | &ARBDecompiler::Unary<I2F_S>, | ||
| 591 | &ARBDecompiler::Unary<I2F_U>, | ||
| 592 | &ARBDecompiler::FSwizzleAdd, | ||
| 593 | |||
| 594 | &ARBDecompiler::Binary<ADD_S>, | ||
| 595 | &ARBDecompiler::Binary<MUL_S>, | ||
| 596 | &ARBDecompiler::Binary<DIV_S>, | ||
| 597 | &ARBDecompiler::Negate<'S'>, | ||
| 598 | &ARBDecompiler::Absolute<'S'>, | ||
| 599 | &ARBDecompiler::Binary<MIN_S>, | ||
| 600 | &ARBDecompiler::Binary<MAX_S>, | ||
| 601 | |||
| 602 | &ARBDecompiler::Unary<TRUNC_S>, | ||
| 603 | &ARBDecompiler::Unary<MOV_U>, | ||
| 604 | &ARBDecompiler::Binary<SHL_S>, | ||
| 605 | &ARBDecompiler::Binary<SHR_U>, | ||
| 606 | &ARBDecompiler::Binary<SHR_S>, | ||
| 607 | &ARBDecompiler::Binary<AND_S>, | ||
| 608 | &ARBDecompiler::Binary<OR_S>, | ||
| 609 | &ARBDecompiler::Binary<XOR_S>, | ||
| 610 | &ARBDecompiler::Unary<NOT_S>, | ||
| 611 | &ARBDecompiler::BitfieldInsert<'S'>, | ||
| 612 | &ARBDecompiler::BitfieldExtract<'S'>, | ||
| 613 | &ARBDecompiler::Unary<BTC_S>, | ||
| 614 | &ARBDecompiler::Unary<BTFM_S>, | ||
| 615 | |||
| 616 | &ARBDecompiler::Binary<ADD_U>, | ||
| 617 | &ARBDecompiler::Binary<MUL_U>, | ||
| 618 | &ARBDecompiler::Binary<DIV_U>, | ||
| 619 | &ARBDecompiler::Binary<MIN_U>, | ||
| 620 | &ARBDecompiler::Binary<MAX_U>, | ||
| 621 | &ARBDecompiler::Unary<TRUNC_U>, | ||
| 622 | &ARBDecompiler::Unary<MOV_U>, | ||
| 623 | &ARBDecompiler::Binary<SHL_U>, | ||
| 624 | &ARBDecompiler::Binary<SHR_U>, | ||
| 625 | &ARBDecompiler::Binary<SHR_U>, | ||
| 626 | &ARBDecompiler::Binary<AND_U>, | ||
| 627 | &ARBDecompiler::Binary<OR_U>, | ||
| 628 | &ARBDecompiler::Binary<XOR_U>, | ||
| 629 | &ARBDecompiler::Unary<NOT_U>, | ||
| 630 | &ARBDecompiler::BitfieldInsert<'U'>, | ||
| 631 | &ARBDecompiler::BitfieldExtract<'U'>, | ||
| 632 | &ARBDecompiler::Unary<BTC_U>, | ||
| 633 | &ARBDecompiler::Unary<BTFM_U>, | ||
| 634 | |||
| 635 | &ARBDecompiler::HAdd2, | ||
| 636 | &ARBDecompiler::HMul2, | ||
| 637 | &ARBDecompiler::HFma2, | ||
| 638 | &ARBDecompiler::HAbsolute, | ||
| 639 | &ARBDecompiler::HNegate, | ||
| 640 | &ARBDecompiler::HClamp, | ||
| 641 | &ARBDecompiler::HCastFloat, | ||
| 642 | &ARBDecompiler::HUnpack, | ||
| 643 | &ARBDecompiler::HMergeF32, | ||
| 644 | &ARBDecompiler::HMergeH0, | ||
| 645 | &ARBDecompiler::HMergeH1, | ||
| 646 | &ARBDecompiler::HPack2, | ||
| 647 | |||
| 648 | &ARBDecompiler::LogicalAssign, | ||
| 649 | &ARBDecompiler::Binary<AND_U>, | ||
| 650 | &ARBDecompiler::Binary<OR_U>, | ||
| 651 | &ARBDecompiler::Binary<XOR_U>, | ||
| 652 | &ARBDecompiler::Unary<NOT_U>, | ||
| 653 | &ARBDecompiler::LogicalPick2, | ||
| 654 | &ARBDecompiler::LogicalAnd2, | ||
| 655 | |||
| 656 | &ARBDecompiler::FloatComparison<SLT_F, false>, | ||
| 657 | &ARBDecompiler::FloatComparison<SEQ_F, false>, | ||
| 658 | &ARBDecompiler::FloatComparison<SLE_F, false>, | ||
| 659 | &ARBDecompiler::FloatComparison<SGT_F, false>, | ||
| 660 | &ARBDecompiler::FloatComparison<SNE_F, false>, | ||
| 661 | &ARBDecompiler::FloatComparison<SGE_F, false>, | ||
| 662 | &ARBDecompiler::FloatOrdered, | ||
| 663 | &ARBDecompiler::FloatUnordered, | ||
| 664 | &ARBDecompiler::FloatComparison<SLT_F, true>, | ||
| 665 | &ARBDecompiler::FloatComparison<SEQ_F, true>, | ||
| 666 | &ARBDecompiler::FloatComparison<SLE_F, true>, | ||
| 667 | &ARBDecompiler::FloatComparison<SGT_F, true>, | ||
| 668 | &ARBDecompiler::FloatComparison<SNE_F, true>, | ||
| 669 | &ARBDecompiler::FloatComparison<SGE_F, true>, | ||
| 670 | |||
| 671 | &ARBDecompiler::Binary<SLT_S>, | ||
| 672 | &ARBDecompiler::Binary<SEQ_S>, | ||
| 673 | &ARBDecompiler::Binary<SLE_S>, | ||
| 674 | &ARBDecompiler::Binary<SGT_S>, | ||
| 675 | &ARBDecompiler::Binary<SNE_S>, | ||
| 676 | &ARBDecompiler::Binary<SGE_S>, | ||
| 677 | |||
| 678 | &ARBDecompiler::Binary<SLT_U>, | ||
| 679 | &ARBDecompiler::Binary<SEQ_U>, | ||
| 680 | &ARBDecompiler::Binary<SLE_U>, | ||
| 681 | &ARBDecompiler::Binary<SGT_U>, | ||
| 682 | &ARBDecompiler::Binary<SNE_U>, | ||
| 683 | &ARBDecompiler::Binary<SGE_U>, | ||
| 684 | |||
| 685 | &ARBDecompiler::LogicalAddCarry, | ||
| 686 | |||
| 687 | &ARBDecompiler::HalfComparison<SLT_F, false>, | ||
| 688 | &ARBDecompiler::HalfComparison<SEQ_F, false>, | ||
| 689 | &ARBDecompiler::HalfComparison<SLE_F, false>, | ||
| 690 | &ARBDecompiler::HalfComparison<SGT_F, false>, | ||
| 691 | &ARBDecompiler::HalfComparison<SNE_F, false>, | ||
| 692 | &ARBDecompiler::HalfComparison<SGE_F, false>, | ||
| 693 | &ARBDecompiler::HalfComparison<SLT_F, true>, | ||
| 694 | &ARBDecompiler::HalfComparison<SEQ_F, true>, | ||
| 695 | &ARBDecompiler::HalfComparison<SLE_F, true>, | ||
| 696 | &ARBDecompiler::HalfComparison<SGT_F, true>, | ||
| 697 | &ARBDecompiler::HalfComparison<SNE_F, true>, | ||
| 698 | &ARBDecompiler::HalfComparison<SGE_F, true>, | ||
| 699 | |||
| 700 | &ARBDecompiler::Texture, | ||
| 701 | &ARBDecompiler::Texture, | ||
| 702 | &ARBDecompiler::TextureGather, | ||
| 703 | &ARBDecompiler::TextureQueryDimensions, | ||
| 704 | &ARBDecompiler::TextureQueryLod, | ||
| 705 | &ARBDecompiler::TexelFetch, | ||
| 706 | &ARBDecompiler::TextureGradient, | ||
| 707 | |||
| 708 | &ARBDecompiler::ImageLoad, | ||
| 709 | &ARBDecompiler::ImageStore, | ||
| 710 | |||
| 711 | &ARBDecompiler::AtomicImage<ADD, U32>, | ||
| 712 | &ARBDecompiler::AtomicImage<AND, U32>, | ||
| 713 | &ARBDecompiler::AtomicImage<OR, U32>, | ||
| 714 | &ARBDecompiler::AtomicImage<XOR, U32>, | ||
| 715 | &ARBDecompiler::AtomicImage<EXCH, U32>, | ||
| 716 | |||
| 717 | &ARBDecompiler::Atomic<EXCH, U32>, | ||
| 718 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 719 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 720 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 721 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 722 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 723 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 724 | |||
| 725 | &ARBDecompiler::Atomic<EXCH, S32>, | ||
| 726 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 727 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 728 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 729 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 730 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 731 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 732 | |||
| 733 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 734 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 735 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 736 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 737 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 738 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 739 | |||
| 740 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 741 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 742 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 743 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 744 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 745 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 746 | |||
| 747 | &ARBDecompiler::Branch, | ||
| 748 | &ARBDecompiler::BranchIndirect, | ||
| 749 | &ARBDecompiler::PushFlowStack, | ||
| 750 | &ARBDecompiler::PopFlowStack, | ||
| 751 | &ARBDecompiler::Exit, | ||
| 752 | &ARBDecompiler::Discard, | ||
| 753 | |||
| 754 | &ARBDecompiler::EmitVertex, | ||
| 755 | &ARBDecompiler::EndPrimitive, | ||
| 756 | |||
| 757 | &ARBDecompiler::InvocationId, | ||
| 758 | &ARBDecompiler::YNegate, | ||
| 759 | &ARBDecompiler::LocalInvocationId<'x'>, | ||
| 760 | &ARBDecompiler::LocalInvocationId<'y'>, | ||
| 761 | &ARBDecompiler::LocalInvocationId<'z'>, | ||
| 762 | &ARBDecompiler::WorkGroupId<'x'>, | ||
| 763 | &ARBDecompiler::WorkGroupId<'y'>, | ||
| 764 | &ARBDecompiler::WorkGroupId<'z'>, | ||
| 765 | |||
| 766 | &ARBDecompiler::Unary<TGBALLOT_U>, | ||
| 767 | &ARBDecompiler::Unary<TGALL_U>, | ||
| 768 | &ARBDecompiler::Unary<TGANY_U>, | ||
| 769 | &ARBDecompiler::Unary<TGEQ_U>, | ||
| 770 | |||
| 771 | &ARBDecompiler::ThreadId, | ||
| 772 | &ARBDecompiler::ThreadMask<'e', 'q'>, | ||
| 773 | &ARBDecompiler::ThreadMask<'g', 'e'>, | ||
| 774 | &ARBDecompiler::ThreadMask<'g', 't'>, | ||
| 775 | &ARBDecompiler::ThreadMask<'l', 'e'>, | ||
| 776 | &ARBDecompiler::ThreadMask<'l', 't'>, | ||
| 777 | &ARBDecompiler::ShuffleIndexed, | ||
| 778 | |||
| 779 | &ARBDecompiler::Barrier, | ||
| 780 | &ARBDecompiler::MemoryBarrierGroup, | ||
| 781 | &ARBDecompiler::MemoryBarrierGlobal, | ||
| 782 | }; | ||
| 783 | }; | ||
| 784 | |||
| 785 | ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 786 | ShaderType stage, std::string_view identifier) | ||
| 787 | : device{device}, ir{ir}, registry{registry}, stage{stage} { | ||
| 788 | AddLine("TEMP RC;"); | ||
| 789 | AddLine("TEMP FSWZA[4];"); | ||
| 790 | AddLine("TEMP FSWZB[4];"); | ||
| 791 | if (ir.IsDecompiled()) { | ||
| 792 | DecompileAST(); | ||
| 793 | } else { | ||
| 794 | DecompileBranchMode(); | ||
| 795 | } | ||
| 796 | AddLine("END"); | ||
| 797 | |||
| 798 | const std::string code = std::move(shader_source); | ||
| 799 | DeclareHeader(); | ||
| 800 | DeclareVertex(); | ||
| 801 | DeclareGeometry(); | ||
| 802 | DeclareFragment(); | ||
| 803 | DeclareCompute(); | ||
| 804 | DeclareInputAttributes(); | ||
| 805 | DeclareOutputAttributes(); | ||
| 806 | DeclareLocalMemory(); | ||
| 807 | DeclareGlobalMemory(); | ||
| 808 | DeclareConstantBuffers(); | ||
| 809 | DeclareRegisters(); | ||
| 810 | DeclareTemporaries(); | ||
| 811 | DeclarePredicates(); | ||
| 812 | DeclareInternalFlags(); | ||
| 813 | |||
| 814 | shader_source += code; | ||
| 815 | } | ||
| 816 | |||
| 817 | std::string_view HeaderStageName(ShaderType stage) { | ||
| 818 | switch (stage) { | ||
| 819 | case ShaderType::Vertex: | ||
| 820 | return "vp"; | ||
| 821 | case ShaderType::Geometry: | ||
| 822 | return "gp"; | ||
| 823 | case ShaderType::Fragment: | ||
| 824 | return "fp"; | ||
| 825 | case ShaderType::Compute: | ||
| 826 | return "cp"; | ||
| 827 | default: | ||
| 828 | UNREACHABLE(); | ||
| 829 | return ""; | ||
| 830 | } | ||
| 831 | } | ||
| 832 | |||
| 833 | void ARBDecompiler::DeclareHeader() { | ||
| 834 | AddLine("!!NV{}5.0", HeaderStageName(stage)); | ||
| 835 | // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D | ||
| 836 | AddLine("OPTION NV_internal;"); | ||
| 837 | AddLine("OPTION NV_gpu_program_fp64;"); | ||
| 838 | AddLine("OPTION NV_shader_storage_buffer;"); | ||
| 839 | AddLine("OPTION NV_shader_thread_group;"); | ||
| 840 | if (ir.UsesWarps() && device.HasWarpIntrinsics()) { | ||
| 841 | AddLine("OPTION NV_shader_thread_shuffle;"); | ||
| 842 | } | ||
| 843 | if (stage == ShaderType::Vertex) { | ||
| 844 | if (device.HasNvViewportArray2()) { | ||
| 845 | AddLine("OPTION NV_viewport_array2;"); | ||
| 846 | } | ||
| 847 | } | ||
| 848 | if (stage == ShaderType::Fragment) { | ||
| 849 | AddLine("OPTION ARB_draw_buffers;"); | ||
| 850 | } | ||
| 851 | if (device.HasImageLoadFormatted()) { | ||
| 852 | AddLine("OPTION EXT_shader_image_load_formatted;"); | ||
| 853 | } | ||
| 854 | } | ||
| 855 | |||
| 856 | void ARBDecompiler::DeclareVertex() { | ||
| 857 | if (stage != ShaderType::Vertex) { | ||
| 858 | return; | ||
| 859 | } | ||
| 860 | AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); | ||
| 861 | } | ||
| 862 | |||
| 863 | void ARBDecompiler::DeclareGeometry() { | ||
| 864 | if (stage != ShaderType::Geometry) { | ||
| 865 | return; | ||
| 866 | } | ||
| 867 | const auto& info = registry.GetGraphicsInfo(); | ||
| 868 | const auto& header = ir.GetHeader(); | ||
| 869 | AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); | ||
| 870 | AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); | ||
| 871 | AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); | ||
| 872 | AddLine("ATTRIB vertex_position = vertex.position;"); | ||
| 873 | } | ||
| 874 | |||
| 875 | void ARBDecompiler::DeclareFragment() { | ||
| 876 | if (stage != ShaderType::Fragment) { | ||
| 877 | return; | ||
| 878 | } | ||
| 879 | AddLine("OUTPUT result_color7 = result.color[7];"); | ||
| 880 | AddLine("OUTPUT result_color6 = result.color[6];"); | ||
| 881 | AddLine("OUTPUT result_color5 = result.color[5];"); | ||
| 882 | AddLine("OUTPUT result_color4 = result.color[4];"); | ||
| 883 | AddLine("OUTPUT result_color3 = result.color[3];"); | ||
| 884 | AddLine("OUTPUT result_color2 = result.color[2];"); | ||
| 885 | AddLine("OUTPUT result_color1 = result.color[1];"); | ||
| 886 | AddLine("OUTPUT result_color0 = result.color;"); | ||
| 887 | } | ||
| 888 | |||
| 889 | void ARBDecompiler::DeclareCompute() { | ||
| 890 | if (stage != ShaderType::Compute) { | ||
| 891 | return; | ||
| 892 | } | ||
| 893 | const ComputeInfo& info = registry.GetComputeInfo(); | ||
| 894 | AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], | ||
| 895 | info.workgroup_size[2]); | ||
| 896 | if (info.shared_memory_size_in_words > 0) { | ||
| 897 | const u32 size_in_bytes = info.shared_memory_size_in_words * 4; | ||
| 898 | AddLine("SHARED_MEMORY {};", size_in_bytes); | ||
| 899 | AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | ||
| 900 | } | ||
| 901 | } | ||
| 902 | |||
| 903 | void ARBDecompiler::DeclareInputAttributes() { | ||
| 904 | if (stage == ShaderType::Compute) { | ||
| 905 | return; | ||
| 906 | } | ||
| 907 | const std::string_view stage_name = StageInputName(stage); | ||
| 908 | for (const auto attribute : ir.GetInputAttributes()) { | ||
| 909 | if (!IsGenericAttribute(attribute)) { | ||
| 910 | continue; | ||
| 911 | } | ||
| 912 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 913 | |||
| 914 | std::string_view suffix; | ||
| 915 | if (stage == ShaderType::Fragment) { | ||
| 916 | const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; | ||
| 917 | if (input_mode == PixelImap::Unused) { | ||
| 918 | return; | ||
| 919 | } | ||
| 920 | suffix = GetInputFlags(input_mode); | ||
| 921 | } | ||
| 922 | AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, | ||
| 923 | index); | ||
| 924 | } | ||
| 925 | } | ||
| 926 | |||
| 927 | void ARBDecompiler::DeclareOutputAttributes() { | ||
| 928 | if (stage == ShaderType::Compute) { | ||
| 929 | return; | ||
| 930 | } | ||
| 931 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 932 | if (!IsGenericAttribute(attribute)) { | ||
| 933 | continue; | ||
| 934 | } | ||
| 935 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 936 | AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); | ||
| 937 | } | ||
| 938 | } | ||
| 939 | |||
| 940 | void ARBDecompiler::DeclareLocalMemory() { | ||
| 941 | u64 size = 0; | ||
| 942 | if (stage == ShaderType::Compute) { | ||
| 943 | size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 944 | } else { | ||
| 945 | size = ir.GetHeader().GetLocalMemorySize(); | ||
| 946 | } | ||
| 947 | if (size == 0) { | ||
| 948 | return; | ||
| 949 | } | ||
| 950 | const u64 element_count = Common::AlignUp(size, 4) / 4; | ||
| 951 | AddLine("TEMP lmem[{}];", element_count); | ||
| 952 | } | ||
| 953 | |||
| 954 | void ARBDecompiler::DeclareGlobalMemory() { | ||
| 955 | u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 956 | for (const auto& pair : ir.GetGlobalMemory()) { | ||
| 957 | const auto& base = pair.first; | ||
| 958 | AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); | ||
| 959 | ++binding; | ||
| 960 | } | ||
| 961 | } | ||
| 962 | |||
| 963 | void ARBDecompiler::DeclareConstantBuffers() { | ||
| 964 | u32 binding = 0; | ||
| 965 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 966 | AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); | ||
| 967 | ++binding; | ||
| 968 | } | ||
| 969 | } | ||
| 970 | |||
| 971 | void ARBDecompiler::DeclareRegisters() { | ||
| 972 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 973 | AddLine("TEMP R{};", gpr); | ||
| 974 | } | ||
| 975 | } | ||
| 976 | |||
| 977 | void ARBDecompiler::DeclareTemporaries() { | ||
| 978 | for (std::size_t i = 0; i < max_temporaries; ++i) { | ||
| 979 | AddLine("TEMP T{};", i); | ||
| 980 | } | ||
| 981 | } | ||
| 982 | |||
| 983 | void ARBDecompiler::DeclarePredicates() { | ||
| 984 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 985 | AddLine("TEMP P{};", static_cast<u64>(pred)); | ||
| 986 | } | ||
| 987 | } | ||
| 988 | |||
| 989 | void ARBDecompiler::DeclareInternalFlags() { | ||
| 990 | for (const char* name : INTERNAL_FLAG_NAMES) { | ||
| 991 | AddLine("TEMP {};", name); | ||
| 992 | } | ||
| 993 | } | ||
| 994 | |||
| 995 | void ARBDecompiler::InitializeVariables() { | ||
| 996 | AddLine("MOV.F32 FSWZA[0], -1;"); | ||
| 997 | AddLine("MOV.F32 FSWZA[1], 1;"); | ||
| 998 | AddLine("MOV.F32 FSWZA[2], -1;"); | ||
| 999 | AddLine("MOV.F32 FSWZA[3], 0;"); | ||
| 1000 | AddLine("MOV.F32 FSWZB[0], -1;"); | ||
| 1001 | AddLine("MOV.F32 FSWZB[1], -1;"); | ||
| 1002 | AddLine("MOV.F32 FSWZB[2], 1;"); | ||
| 1003 | AddLine("MOV.F32 FSWZB[3], -1;"); | ||
| 1004 | |||
| 1005 | if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { | ||
| 1006 | AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); | ||
| 1007 | } | ||
| 1008 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 1009 | if (!IsGenericAttribute(attribute)) { | ||
| 1010 | continue; | ||
| 1011 | } | ||
| 1012 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 1013 | AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); | ||
| 1014 | } | ||
| 1015 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1016 | AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); | ||
| 1017 | } | ||
| 1018 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1019 | AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); | ||
| 1020 | } | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | void ARBDecompiler::DecompileAST() { | ||
| 1024 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 1025 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1026 | AddLine("TEMP F{};", i); | ||
| 1027 | } | ||
| 1028 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1029 | AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | InitializeVariables(); | ||
| 1033 | |||
| 1034 | VisitAST(ir.GetASTProgram()); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | void ARBDecompiler::DecompileBranchMode() { | ||
| 1038 | static constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 1039 | if (!ir.IsFlowStackDisabled()) { | ||
| 1040 | AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); | ||
| 1041 | AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); | ||
| 1042 | AddLine("TEMP SSY_TOP;"); | ||
| 1043 | AddLine("TEMP PBK_TOP;"); | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | AddLine("TEMP PC;"); | ||
| 1047 | |||
| 1048 | if (!ir.IsFlowStackDisabled()) { | ||
| 1049 | AddLine("MOV.U SSY_TOP.x, 0;"); | ||
| 1050 | AddLine("MOV.U PBK_TOP.x, 0;"); | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | InitializeVariables(); | ||
| 1054 | |||
| 1055 | const auto basic_block_end = ir.GetBasicBlocks().end(); | ||
| 1056 | auto basic_block_it = ir.GetBasicBlocks().begin(); | ||
| 1057 | const u32 first_address = basic_block_it->first; | ||
| 1058 | AddLine("MOV.U PC.x, {};", first_address); | ||
| 1059 | |||
| 1060 | AddLine("REP;"); | ||
| 1061 | |||
| 1062 | std::size_t num_blocks = 0; | ||
| 1063 | while (basic_block_it != basic_block_end) { | ||
| 1064 | const auto& [address, bb] = *basic_block_it; | ||
| 1065 | ++num_blocks; | ||
| 1066 | |||
| 1067 | AddLine("SEQ.S.CC RC.x, PC.x, {};", address); | ||
| 1068 | AddLine("IF NE.x;"); | ||
| 1069 | |||
| 1070 | VisitBlock(bb); | ||
| 1071 | |||
| 1072 | ++basic_block_it; | ||
| 1073 | |||
| 1074 | if (basic_block_it != basic_block_end) { | ||
| 1075 | const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); | ||
| 1076 | if (!op || op->GetCode() != OperationCode::Branch) { | ||
| 1077 | const u32 next_address = basic_block_it->first; | ||
| 1078 | AddLine("MOV.U PC.x, {};", next_address); | ||
| 1079 | AddLine("CONT;"); | ||
| 1080 | } | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | AddLine("ELSE;"); | ||
| 1084 | } | ||
| 1085 | AddLine("RET;"); | ||
| 1086 | while (num_blocks--) { | ||
| 1087 | AddLine("ENDIF;"); | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | AddLine("ENDREP;"); | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | void ARBDecompiler::VisitAST(const ASTNode& node) { | ||
| 1094 | if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { | ||
| 1095 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1096 | VisitAST(current); | ||
| 1097 | } | ||
| 1098 | } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) { | ||
| 1099 | const std::string condition = VisitExpression(ast->condition); | ||
| 1100 | ResetTemporaries(); | ||
| 1101 | |||
| 1102 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1103 | AddLine("IF NE.x;"); | ||
| 1104 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1105 | VisitAST(current); | ||
| 1106 | } | ||
| 1107 | AddLine("ENDIF;"); | ||
| 1108 | } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) { | ||
| 1109 | AddLine("ELSE;"); | ||
| 1110 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1111 | VisitAST(current); | ||
| 1112 | } | ||
| 1113 | } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { | ||
| 1114 | VisitBlock(ast->nodes); | ||
| 1115 | } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) { | ||
| 1116 | AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition)); | ||
| 1117 | ResetTemporaries(); | ||
| 1118 | } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { | ||
| 1119 | const std::string condition = VisitExpression(ast->condition); | ||
| 1120 | ResetTemporaries(); | ||
| 1121 | AddLine("REP;"); | ||
| 1122 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1123 | VisitAST(current); | ||
| 1124 | } | ||
| 1125 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1126 | AddLine("BRK (NE.x);"); | ||
| 1127 | AddLine("ENDREP;"); | ||
| 1128 | } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) { | ||
| 1129 | const bool is_true = ExprIsTrue(ast->condition); | ||
| 1130 | if (!is_true) { | ||
| 1131 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); | ||
| 1132 | AddLine("IF NE.x;"); | ||
| 1133 | ResetTemporaries(); | ||
| 1134 | } | ||
| 1135 | if (ast->kills) { | ||
| 1136 | AddLine("KIL TR;"); | ||
| 1137 | } else { | ||
| 1138 | Exit(); | ||
| 1139 | } | ||
| 1140 | if (!is_true) { | ||
| 1141 | AddLine("ENDIF;"); | ||
| 1142 | } | ||
| 1143 | } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) { | ||
| 1144 | if (ExprIsTrue(ast->condition)) { | ||
| 1145 | AddLine("BRK;"); | ||
| 1146 | } else { | ||
| 1147 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); | ||
| 1148 | AddLine("BRK (NE.x);"); | ||
| 1149 | ResetTemporaries(); | ||
| 1150 | } | ||
| 1151 | } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { | ||
| 1152 | // Nothing to do | ||
| 1153 | } else { | ||
| 1154 | UNREACHABLE(); | ||
| 1155 | } | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | std::string ARBDecompiler::VisitExpression(const Expr& node) { | ||
| 1159 | const std::string result = AllocTemporary(); | ||
| 1160 | if (const auto expr = std::get_if<ExprAnd>(&*node)) { | ||
| 1161 | AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1162 | VisitExpression(expr->operand2)); | ||
| 1163 | return result; | ||
| 1164 | } | ||
| 1165 | if (const auto expr = std::get_if<ExprOr>(&*node)) { | ||
| 1166 | const std::string result = AllocTemporary(); | ||
| 1167 | AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1168 | VisitExpression(expr->operand2)); | ||
| 1169 | return result; | ||
| 1170 | } | ||
| 1171 | if (const auto expr = std::get_if<ExprNot>(&*node)) { | ||
| 1172 | const std::string result = AllocTemporary(); | ||
| 1173 | AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); | ||
| 1174 | return result; | ||
| 1175 | } | ||
| 1176 | if (const auto expr = std::get_if<ExprPredicate>(&*node)) { | ||
| 1177 | return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); | ||
| 1178 | } | ||
| 1179 | if (const auto expr = std::get_if<ExprCondCode>(&*node)) { | ||
| 1180 | return Visit(ir.GetConditionCode(expr->cc)); | ||
| 1181 | } | ||
| 1182 | if (const auto expr = std::get_if<ExprVar>(&*node)) { | ||
| 1183 | return fmt::format("F{}.x", expr->var_index); | ||
| 1184 | } | ||
| 1185 | if (const auto expr = std::get_if<ExprBoolean>(&*node)) { | ||
| 1186 | return expr->value ? "0xffffffff" : "0"; | ||
| 1187 | } | ||
| 1188 | if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { | ||
| 1189 | const std::string result = AllocTemporary(); | ||
| 1190 | AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); | ||
| 1191 | return result; | ||
| 1192 | } | ||
| 1193 | UNREACHABLE(); | ||
| 1194 | return "0"; | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | void ARBDecompiler::VisitBlock(const NodeBlock& bb) { | ||
| 1198 | for (const auto& node : bb) { | ||
| 1199 | Visit(node); | ||
| 1200 | } | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | std::string ARBDecompiler::Visit(const Node& node) { | ||
| 1204 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 1205 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 1206 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1207 | } | ||
| 1208 | const std::size_t index = static_cast<std::size_t>(operation->GetCode()); | ||
| 1209 | if (index >= OPERATION_DECOMPILERS.size()) { | ||
| 1210 | UNREACHABLE_MSG("Out of bounds operation: {}", index); | ||
| 1211 | return {}; | ||
| 1212 | } | ||
| 1213 | const auto decompiler = OPERATION_DECOMPILERS[index]; | ||
| 1214 | if (decompiler == nullptr) { | ||
| 1215 | UNREACHABLE_MSG("Undefined operation: {}", index); | ||
| 1216 | return {}; | ||
| 1217 | } | ||
| 1218 | return (this->*decompiler)(*operation); | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1222 | const u32 index = gpr->GetIndex(); | ||
| 1223 | if (index == Register::ZeroIndex) { | ||
| 1224 | return "{0, 0, 0, 0}.x"; | ||
| 1225 | } | ||
| 1226 | return fmt::format("R{}.x", index); | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1230 | return fmt::format("CV{}.x", cv->GetIndex()); | ||
| 1231 | } | ||
| 1232 | |||
| 1233 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1234 | const std::string temporary = AllocTemporary(); | ||
| 1235 | AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); | ||
| 1236 | return temporary; | ||
| 1237 | } | ||
| 1238 | |||
| 1239 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1240 | const std::string temporary = AllocTemporary(); | ||
| 1241 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1242 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1243 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1244 | break; | ||
| 1245 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1246 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1247 | break; | ||
| 1248 | default: | ||
| 1249 | AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); | ||
| 1250 | break; | ||
| 1251 | } | ||
| 1252 | if (predicate->IsNegated()) { | ||
| 1253 | AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); | ||
| 1254 | } | ||
| 1255 | return temporary; | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1259 | if (abuf->IsPhysicalBuffer()) { | ||
| 1260 | UNIMPLEMENTED_MSG("Physical buffers are not implemented"); | ||
| 1261 | return "{0, 0, 0, 0}.x"; | ||
| 1262 | } | ||
| 1263 | |||
| 1264 | const auto buffer_index = [this, &abuf]() -> std::string { | ||
| 1265 | if (stage != ShaderType::Geometry) { | ||
| 1266 | return ""; | ||
| 1267 | } | ||
| 1268 | return fmt::format("[{}]", Visit(abuf->GetBuffer())); | ||
| 1269 | }; | ||
| 1270 | |||
| 1271 | const Attribute::Index index = abuf->GetIndex(); | ||
| 1272 | const u32 element = abuf->GetElement(); | ||
| 1273 | const char swizzle = Swizzle(element); | ||
| 1274 | switch (index) { | ||
| 1275 | case Attribute::Index::Position: { | ||
| 1276 | if (stage == ShaderType::Geometry) { | ||
| 1277 | return fmt::format("{}_position[{}].{}", StageInputName(stage), | ||
| 1278 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1279 | } else { | ||
| 1280 | return fmt::format("{}.position.{}", StageInputName(stage), swizzle); | ||
| 1281 | } | ||
| 1282 | } | ||
| 1283 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1284 | ASSERT(stage == ShaderType::Vertex); | ||
| 1285 | switch (element) { | ||
| 1286 | case 2: | ||
| 1287 | return "vertex.instance"; | ||
| 1288 | case 3: | ||
| 1289 | return "vertex.id"; | ||
| 1290 | } | ||
| 1291 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1292 | break; | ||
| 1293 | case Attribute::Index::PointCoord: | ||
| 1294 | switch (element) { | ||
| 1295 | case 0: | ||
| 1296 | return "fragment.pointcoord.x"; | ||
| 1297 | case 1: | ||
| 1298 | return "fragment.pointcoord.y"; | ||
| 1299 | } | ||
| 1300 | UNIMPLEMENTED(); | ||
| 1301 | break; | ||
| 1302 | case Attribute::Index::FrontFacing: { | ||
| 1303 | ASSERT(stage == ShaderType::Fragment); | ||
| 1304 | ASSERT(element == 3); | ||
| 1305 | const std::string temporary = AllocVectorTemporary(); | ||
| 1306 | AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); | ||
| 1307 | AddLine("MOV.U.CC RC.x, -RC;"); | ||
| 1308 | AddLine("MOV.S {}.x, 0;", temporary); | ||
| 1309 | AddLine("MOV.S {}.x (NE.x), -1;", temporary); | ||
| 1310 | return fmt::format("{}.x", temporary); | ||
| 1311 | } | ||
| 1312 | default: | ||
| 1313 | if (IsGenericAttribute(index)) { | ||
| 1314 | if (stage == ShaderType::Geometry) { | ||
| 1315 | return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), | ||
| 1316 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1317 | } else { | ||
| 1318 | return fmt::format("{}.attrib[{}].{}", StageInputName(stage), | ||
| 1319 | GetGenericAttributeIndex(index), swizzle); | ||
| 1320 | } | ||
| 1321 | } | ||
| 1322 | UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index)); | ||
| 1323 | break; | ||
| 1324 | } | ||
| 1325 | return "{0, 0, 0, 0}.x"; | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1329 | std::string offset_string; | ||
| 1330 | const auto& offset = cbuf->GetOffset(); | ||
| 1331 | if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1332 | offset_string = std::to_string(imm->GetValue()); | ||
| 1333 | } else { | ||
| 1334 | offset_string = Visit(offset); | ||
| 1335 | } | ||
| 1336 | const std::string temporary = AllocTemporary(); | ||
| 1337 | AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); | ||
| 1338 | return temporary; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1342 | const std::string temporary = AllocTemporary(); | ||
| 1343 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1344 | Visit(gmem->GetBaseAddress())); | ||
| 1345 | AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1346 | temporary); | ||
| 1347 | return temporary; | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1351 | const std::string temporary = Visit(lmem->GetAddress()); | ||
| 1352 | AddLine("SHR.U {}, {}, 2;", temporary, temporary); | ||
| 1353 | AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); | ||
| 1354 | return temporary; | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1358 | const std::string temporary = Visit(smem->GetAddress()); | ||
| 1359 | AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); | ||
| 1360 | return temporary; | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1364 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1365 | return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1369 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1370 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1371 | } | ||
| 1372 | AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); | ||
| 1373 | AddLine("IF NE.x;"); | ||
| 1374 | VisitBlock(conditional->GetCode()); | ||
| 1375 | AddLine("ENDIF;"); | ||
| 1376 | return {}; | ||
| 1377 | } | ||
| 1378 | |||
| 1379 | if (const auto cmt = std::get_if<CommentNode>(&*node)) { | ||
| 1380 | // Uncommenting this will generate invalid code. GLASM lacks comments. | ||
| 1381 | // AddLine("// {}", cmt->GetText()); | ||
| 1382 | return {}; | ||
| 1383 | } | ||
| 1384 | |||
| 1385 | UNIMPLEMENTED(); | ||
| 1386 | return {}; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { | ||
| 1390 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1391 | UNIMPLEMENTED_IF(meta.sampler.is_indexed); | ||
| 1392 | UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array && | ||
| 1393 | meta.sampler.type == Tegra::Shader::TextureType::TextureCube); | ||
| 1394 | |||
| 1395 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1396 | std::string temporary = AllocVectorTemporary(); | ||
| 1397 | std::size_t i = 0; | ||
| 1398 | for (; i < count; ++i) { | ||
| 1399 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1400 | } | ||
| 1401 | if (meta.sampler.is_array) { | ||
| 1402 | AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array)); | ||
| 1403 | } | ||
| 1404 | if (meta.sampler.is_shadow) { | ||
| 1405 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare)); | ||
| 1406 | } | ||
| 1407 | return {std::move(temporary), i}; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | std::string ARBDecompiler::BuildAoffi(Operation operation) { | ||
| 1411 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1412 | if (meta.aoffi.empty()) { | ||
| 1413 | return {}; | ||
| 1414 | } | ||
| 1415 | const std::string temporary = AllocVectorTemporary(); | ||
| 1416 | std::size_t i = 0; | ||
| 1417 | for (auto& node : meta.aoffi) { | ||
| 1418 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); | ||
| 1419 | } | ||
| 1420 | return fmt::format(", offset({})", temporary); | ||
| 1421 | } | ||
| 1422 | |||
| 1423 | void ARBDecompiler::Exit() { | ||
| 1424 | if (stage != ShaderType::Fragment) { | ||
| 1425 | AddLine("RET;"); | ||
| 1426 | return; | ||
| 1427 | } | ||
| 1428 | |||
| 1429 | const auto safe_get_register = [this](u32 reg) -> std::string { | ||
| 1430 | // TODO(Rodrigo): Replace with contains once C++20 releases | ||
| 1431 | const auto& used_registers = ir.GetRegisters(); | ||
| 1432 | if (used_registers.find(reg) != used_registers.end()) { | ||
| 1433 | return fmt::format("R{}.x", reg); | ||
| 1434 | } | ||
| 1435 | return "{0, 0, 0, 0}.x"; | ||
| 1436 | }; | ||
| 1437 | |||
| 1438 | const auto& header = ir.GetHeader(); | ||
| 1439 | u32 current_reg = 0; | ||
| 1440 | for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { | ||
| 1441 | for (u32 component = 0; component < 4; ++component) { | ||
| 1442 | if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 1443 | continue; | ||
| 1444 | } | ||
| 1445 | AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), | ||
| 1446 | safe_get_register(current_reg)); | ||
| 1447 | ++current_reg; | ||
| 1448 | } | ||
| 1449 | } | ||
| 1450 | if (header.ps.omap.depth) { | ||
| 1451 | AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); | ||
| 1452 | } | ||
| 1453 | |||
| 1454 | AddLine("RET;"); | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | std::string ARBDecompiler::Assign(Operation operation) { | ||
| 1458 | const Node& dest = operation[0]; | ||
| 1459 | const Node& src = operation[1]; | ||
| 1460 | |||
| 1461 | std::string dest_name; | ||
| 1462 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1463 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1464 | // Writing to Register::ZeroIndex is a no op | ||
| 1465 | return {}; | ||
| 1466 | } | ||
| 1467 | dest_name = fmt::format("R{}.x", gpr->GetIndex()); | ||
| 1468 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1469 | const u32 element = abuf->GetElement(); | ||
| 1470 | const char swizzle = Swizzle(element); | ||
| 1471 | switch (const Attribute::Index index = abuf->GetIndex()) { | ||
| 1472 | case Attribute::Index::Position: | ||
| 1473 | dest_name = fmt::format("result.position.{}", swizzle); | ||
| 1474 | break; | ||
| 1475 | case Attribute::Index::LayerViewportPointSize: | ||
| 1476 | switch (element) { | ||
| 1477 | case 0: | ||
| 1478 | UNIMPLEMENTED(); | ||
| 1479 | return {}; | ||
| 1480 | case 1: | ||
| 1481 | case 2: | ||
| 1482 | if (!device.HasNvViewportArray2()) { | ||
| 1483 | LOG_ERROR( | ||
| 1484 | Render_OpenGL, | ||
| 1485 | "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); | ||
| 1486 | return {}; | ||
| 1487 | } | ||
| 1488 | dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; | ||
| 1489 | break; | ||
| 1490 | case 3: | ||
| 1491 | dest_name = "result.pointsize.x"; | ||
| 1492 | break; | ||
| 1493 | } | ||
| 1494 | break; | ||
| 1495 | case Attribute::Index::ClipDistances0123: | ||
| 1496 | dest_name = fmt::format("result.clip[{}].x", element); | ||
| 1497 | break; | ||
| 1498 | case Attribute::Index::ClipDistances4567: | ||
| 1499 | dest_name = fmt::format("result.clip[{}].x", element + 4); | ||
| 1500 | break; | ||
| 1501 | default: | ||
| 1502 | if (!IsGenericAttribute(index)) { | ||
| 1503 | UNREACHABLE(); | ||
| 1504 | return {}; | ||
| 1505 | } | ||
| 1506 | dest_name = | ||
| 1507 | fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); | ||
| 1508 | break; | ||
| 1509 | } | ||
| 1510 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1511 | const std::string address = Visit(lmem->GetAddress()); | ||
| 1512 | AddLine("SHR.U {}, {}, 2;", address, address); | ||
| 1513 | dest_name = fmt::format("lmem[{}].x", address); | ||
| 1514 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1515 | AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); | ||
| 1516 | ResetTemporaries(); | ||
| 1517 | return {}; | ||
| 1518 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1519 | const std::string temporary = AllocTemporary(); | ||
| 1520 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1521 | Visit(gmem->GetBaseAddress())); | ||
| 1522 | AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1523 | temporary); | ||
| 1524 | ResetTemporaries(); | ||
| 1525 | return {}; | ||
| 1526 | } else { | ||
| 1527 | UNREACHABLE(); | ||
| 1528 | ResetTemporaries(); | ||
| 1529 | return {}; | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | AddLine("MOV.U {}, {};", dest_name, Visit(src)); | ||
| 1533 | ResetTemporaries(); | ||
| 1534 | return {}; | ||
| 1535 | } | ||
| 1536 | |||
| 1537 | std::string ARBDecompiler::Select(Operation operation) { | ||
| 1538 | const std::string temporary = AllocTemporary(); | ||
| 1539 | AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), | ||
| 1540 | Visit(operation[2])); | ||
| 1541 | return temporary; | ||
| 1542 | } | ||
| 1543 | |||
| 1544 | std::string ARBDecompiler::FClamp(Operation operation) { | ||
| 1545 | // 1.0f in hex, replace with std::bit_cast on C++20 | ||
| 1546 | static constexpr u32 POSITIVE_ONE = 0x3f800000; | ||
| 1547 | |||
| 1548 | const std::string temporary = AllocTemporary(); | ||
| 1549 | const Node& value = operation[0]; | ||
| 1550 | const Node& low = operation[1]; | ||
| 1551 | const Node& high = operation[2]; | ||
| 1552 | const auto imm_low = std::get_if<ImmediateNode>(&*low); | ||
| 1553 | const auto imm_high = std::get_if<ImmediateNode>(&*high); | ||
| 1554 | if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { | ||
| 1555 | AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); | ||
| 1556 | } else { | ||
| 1557 | AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); | ||
| 1558 | AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); | ||
| 1559 | } | ||
| 1560 | return temporary; | ||
| 1561 | } | ||
| 1562 | |||
| 1563 | std::string ARBDecompiler::FCastHalf0(Operation operation) { | ||
| 1564 | const std::string temporary = AllocVectorTemporary(); | ||
| 1565 | AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1566 | return fmt::format("{}.x", temporary); | ||
| 1567 | } | ||
| 1568 | |||
| 1569 | std::string ARBDecompiler::FCastHalf1(Operation operation) { | ||
| 1570 | const std::string temporary = AllocVectorTemporary(); | ||
| 1571 | AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); | ||
| 1572 | AddLine("MOV {}.x, {}.y;", temporary, temporary); | ||
| 1573 | return fmt::format("{}.x", temporary); | ||
| 1574 | } | ||
| 1575 | |||
| 1576 | std::string ARBDecompiler::FSqrt(Operation operation) { | ||
| 1577 | const std::string temporary = AllocTemporary(); | ||
| 1578 | AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); | ||
| 1579 | AddLine("RCP.F32 {}, {};", temporary, temporary); | ||
| 1580 | return temporary; | ||
| 1581 | } | ||
| 1582 | |||
| 1583 | std::string ARBDecompiler::FSwizzleAdd(Operation operation) { | ||
| 1584 | const std::string temporary = AllocVectorTemporary(); | ||
| 1585 | if (!device.HasWarpIntrinsics()) { | ||
| 1586 | LOG_ERROR(Render_OpenGL, | ||
| 1587 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 1588 | AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); | ||
| 1589 | return fmt::format("{}.x", temporary); | ||
| 1590 | } | ||
| 1591 | const std::string lut = AllocVectorTemporary(); | ||
| 1592 | AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); | ||
| 1593 | AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); | ||
| 1594 | AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); | ||
| 1595 | AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); | ||
| 1596 | AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); | ||
| 1597 | AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); | ||
| 1598 | AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); | ||
| 1599 | return fmt::format("{}.x", temporary); | ||
| 1600 | } | ||
| 1601 | |||
| 1602 | std::string ARBDecompiler::HAdd2(Operation operation) { | ||
| 1603 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1604 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1605 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1606 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1607 | AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1608 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1609 | return fmt::format("{}.x", tmp1); | ||
| 1610 | } | ||
| 1611 | |||
| 1612 | std::string ARBDecompiler::HMul2(Operation operation) { | ||
| 1613 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1614 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1615 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1616 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1617 | AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1618 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1619 | return fmt::format("{}.x", tmp1); | ||
| 1620 | } | ||
| 1621 | |||
| 1622 | std::string ARBDecompiler::HFma2(Operation operation) { | ||
| 1623 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1624 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1625 | const std::string tmp3 = AllocVectorTemporary(); | ||
| 1626 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1627 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1628 | AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); | ||
| 1629 | AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); | ||
| 1630 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1631 | return fmt::format("{}.x", tmp1); | ||
| 1632 | } | ||
| 1633 | |||
| 1634 | std::string ARBDecompiler::HAbsolute(Operation operation) { | ||
| 1635 | const std::string temporary = AllocVectorTemporary(); | ||
| 1636 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1637 | AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); | ||
| 1638 | return fmt::format("{}.x", temporary); | ||
| 1639 | } | ||
| 1640 | |||
| 1641 | std::string ARBDecompiler::HNegate(Operation operation) { | ||
| 1642 | const std::string temporary = AllocVectorTemporary(); | ||
| 1643 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1644 | AddLine("MOVC.S RC.x, {};", Visit(operation[1])); | ||
| 1645 | AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); | ||
| 1646 | AddLine("MOVC.S RC.x, {};", Visit(operation[2])); | ||
| 1647 | AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); | ||
| 1648 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1649 | return fmt::format("{}.x", temporary); | ||
| 1650 | } | ||
| 1651 | |||
| 1652 | std::string ARBDecompiler::HClamp(Operation operation) { | ||
| 1653 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1654 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1655 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1656 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); | ||
| 1657 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1658 | AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1659 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); | ||
| 1660 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1661 | AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1662 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1663 | return fmt::format("{}.x", tmp1); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | std::string ARBDecompiler::HCastFloat(Operation operation) { | ||
| 1667 | const std::string temporary = AllocVectorTemporary(); | ||
| 1668 | AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); | ||
| 1669 | AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1670 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1671 | return fmt::format("{}.x", temporary); | ||
| 1672 | } | ||
| 1673 | |||
| 1674 | std::string ARBDecompiler::HUnpack(Operation operation) { | ||
| 1675 | const std::string operand = Visit(operation[0]); | ||
| 1676 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1677 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1678 | return operand; | ||
| 1679 | case Tegra::Shader::HalfType::F32: { | ||
| 1680 | const std::string temporary = AllocVectorTemporary(); | ||
| 1681 | AddLine("MOV.U {}.x, {};", temporary, operand); | ||
| 1682 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1683 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1684 | return fmt::format("{}.x", temporary); | ||
| 1685 | } | ||
| 1686 | case Tegra::Shader::HalfType::H0_H0: { | ||
| 1687 | const std::string temporary = AllocVectorTemporary(); | ||
| 1688 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1689 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1690 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1691 | return fmt::format("{}.x", temporary); | ||
| 1692 | } | ||
| 1693 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1694 | const std::string temporary = AllocVectorTemporary(); | ||
| 1695 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1696 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 1697 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1698 | return fmt::format("{}.x", temporary); | ||
| 1699 | } | ||
| 1700 | } | ||
| 1701 | UNREACHABLE(); | ||
| 1702 | return "{0, 0, 0, 0}.x"; | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | std::string ARBDecompiler::HMergeF32(Operation operation) { | ||
| 1706 | const std::string temporary = AllocVectorTemporary(); | ||
| 1707 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1708 | return fmt::format("{}.x", temporary); | ||
| 1709 | } | ||
| 1710 | |||
| 1711 | std::string ARBDecompiler::HMergeH0(Operation operation) { | ||
| 1712 | const std::string temporary = AllocVectorTemporary(); | ||
| 1713 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1714 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1715 | AddLine("MOV.U {}.x, {}.z;", temporary, temporary); | ||
| 1716 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1717 | return fmt::format("{}.x", temporary); | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | std::string ARBDecompiler::HMergeH1(Operation operation) { | ||
| 1721 | const std::string temporary = AllocVectorTemporary(); | ||
| 1722 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1723 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1724 | AddLine("MOV.U {}.y, {}.w;", temporary, temporary); | ||
| 1725 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1726 | return fmt::format("{}.x", temporary); | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | std::string ARBDecompiler::HPack2(Operation operation) { | ||
| 1730 | const std::string temporary = AllocVectorTemporary(); | ||
| 1731 | AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); | ||
| 1732 | AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); | ||
| 1733 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1734 | return fmt::format("{}.x", temporary); | ||
| 1735 | } | ||
| 1736 | |||
| 1737 | std::string ARBDecompiler::LogicalAssign(Operation operation) { | ||
| 1738 | const Node& dest = operation[0]; | ||
| 1739 | const Node& src = operation[1]; | ||
| 1740 | |||
| 1741 | std::string target; | ||
| 1742 | |||
| 1743 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1744 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1745 | |||
| 1746 | const Tegra::Shader::Pred index = pred->GetIndex(); | ||
| 1747 | switch (index) { | ||
| 1748 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1749 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1750 | // Writing to these predicates is a no-op | ||
| 1751 | return {}; | ||
| 1752 | } | ||
| 1753 | target = fmt::format("P{}.x", static_cast<u64>(index)); | ||
| 1754 | } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1755 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1756 | target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1757 | } else { | ||
| 1758 | UNREACHABLE(); | ||
| 1759 | ResetTemporaries(); | ||
| 1760 | return {}; | ||
| 1761 | } | ||
| 1762 | |||
| 1763 | AddLine("MOV.U {}, {};", target, Visit(src)); | ||
| 1764 | ResetTemporaries(); | ||
| 1765 | return {}; | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | std::string ARBDecompiler::LogicalPick2(Operation operation) { | ||
| 1769 | const std::string temporary = AllocTemporary(); | ||
| 1770 | const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); | ||
| 1771 | AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); | ||
| 1772 | return temporary; | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | std::string ARBDecompiler::LogicalAnd2(Operation operation) { | ||
| 1776 | const std::string temporary = AllocTemporary(); | ||
| 1777 | const std::string op = Visit(operation[0]); | ||
| 1778 | AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); | ||
| 1779 | return temporary; | ||
| 1780 | } | ||
| 1781 | |||
| 1782 | std::string ARBDecompiler::FloatOrdered(Operation operation) { | ||
| 1783 | const std::string temporary = AllocTemporary(); | ||
| 1784 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1785 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1786 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1787 | AddLine("MOV.S {} (NAN.x), 0;", temporary); | ||
| 1788 | AddLine("MOV.S {} (NAN.y), 0;", temporary); | ||
| 1789 | return temporary; | ||
| 1790 | } | ||
| 1791 | |||
| 1792 | std::string ARBDecompiler::FloatUnordered(Operation operation) { | ||
| 1793 | const std::string temporary = AllocTemporary(); | ||
| 1794 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1795 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1796 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1797 | AddLine("MOV.S {} (NAN.x), -1;", temporary); | ||
| 1798 | AddLine("MOV.S {} (NAN.y), -1;", temporary); | ||
| 1799 | return temporary; | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | std::string ARBDecompiler::LogicalAddCarry(Operation operation) { | ||
| 1803 | const std::string temporary = AllocTemporary(); | ||
| 1804 | AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); | ||
| 1805 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1806 | AddLine("IF CF.x;"); | ||
| 1807 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1808 | AddLine("ENDIF;"); | ||
| 1809 | return temporary; | ||
| 1810 | } | ||
| 1811 | |||
| 1812 | std::string ARBDecompiler::Texture(Operation operation) { | ||
| 1813 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1814 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1815 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1816 | |||
| 1817 | std::string_view opcode = "TEX"; | ||
| 1818 | std::string extra; | ||
| 1819 | if (meta.bias) { | ||
| 1820 | ASSERT(!meta.lod); | ||
| 1821 | opcode = "TXB"; | ||
| 1822 | |||
| 1823 | if (swizzle < 4) { | ||
| 1824 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); | ||
| 1825 | } else { | ||
| 1826 | const std::string bias = AllocTemporary(); | ||
| 1827 | AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); | ||
| 1828 | extra = fmt::format(" {},", bias); | ||
| 1829 | } | ||
| 1830 | } | ||
| 1831 | if (meta.lod) { | ||
| 1832 | ASSERT(!meta.bias); | ||
| 1833 | opcode = "TXL"; | ||
| 1834 | |||
| 1835 | if (swizzle < 4) { | ||
| 1836 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1837 | } else { | ||
| 1838 | const std::string lod = AllocTemporary(); | ||
| 1839 | AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); | ||
| 1840 | extra = fmt::format(" {},", lod); | ||
| 1841 | } | ||
| 1842 | } | ||
| 1843 | |||
| 1844 | AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id, | ||
| 1845 | TextureType(meta), BuildAoffi(operation)); | ||
| 1846 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1847 | return fmt::format("{}.x", temporary); | ||
| 1848 | } | ||
| 1849 | |||
| 1850 | std::string ARBDecompiler::TextureGather(Operation operation) { | ||
| 1851 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1852 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1853 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1854 | |||
| 1855 | std::string comp; | ||
| 1856 | if (!meta.sampler.is_shadow) { | ||
| 1857 | const auto& immediate = std::get<ImmediateNode>(*meta.component); | ||
| 1858 | comp = fmt::format(".{}", Swizzle(immediate.GetValue())); | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, | ||
| 1862 | TextureType(meta), BuildAoffi(operation)); | ||
| 1863 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1864 | return fmt::format("{}.x", temporary); | ||
| 1865 | } | ||
| 1866 | |||
| 1867 | std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { | ||
| 1868 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1869 | const std::string temporary = AllocVectorTemporary(); | ||
| 1870 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1871 | |||
| 1872 | ASSERT(!meta.sampler.is_array); | ||
| 1873 | |||
| 1874 | const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; | ||
| 1875 | AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); | ||
| 1876 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1877 | return fmt::format("{}.x", temporary); | ||
| 1878 | } | ||
| 1879 | |||
| 1880 | std::string ARBDecompiler::TextureQueryLod(Operation operation) { | ||
| 1881 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1882 | const std::string temporary = AllocVectorTemporary(); | ||
| 1883 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1884 | |||
| 1885 | ASSERT(!meta.sampler.is_array); | ||
| 1886 | |||
| 1887 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1888 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1889 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1890 | } | ||
| 1891 | AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); | ||
| 1892 | AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); | ||
| 1893 | AddLine("TRUNC.S {}, {};", temporary, temporary); | ||
| 1894 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1895 | return fmt::format("{}.x", temporary); | ||
| 1896 | } | ||
| 1897 | |||
| 1898 | std::string ARBDecompiler::TexelFetch(Operation operation) { | ||
| 1899 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1900 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1901 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1902 | |||
| 1903 | if (!meta.sampler.is_buffer) { | ||
| 1904 | ASSERT(swizzle < 4); | ||
| 1905 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1906 | } | ||
| 1907 | AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta), | ||
| 1908 | BuildAoffi(operation)); | ||
| 1909 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1910 | return fmt::format("{}.x", temporary); | ||
| 1911 | } | ||
| 1912 | |||
| 1913 | std::string ARBDecompiler::TextureGradient(Operation operation) { | ||
| 1914 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1915 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1916 | const std::string ddx = AllocVectorTemporary(); | ||
| 1917 | const std::string ddy = AllocVectorTemporary(); | ||
| 1918 | const std::string coord = BuildCoords(operation).first; | ||
| 1919 | |||
| 1920 | const std::size_t num_components = meta.derivates.size() / 2; | ||
| 1921 | for (std::size_t index = 0; index < num_components; ++index) { | ||
| 1922 | const char swizzle = Swizzle(index); | ||
| 1923 | AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); | ||
| 1924 | AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); | ||
| 1925 | } | ||
| 1926 | |||
| 1927 | const std::string_view result = coord; | ||
| 1928 | AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, | ||
| 1929 | TextureType(meta), BuildAoffi(operation)); | ||
| 1930 | AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); | ||
| 1931 | return fmt::format("{}.x", result); | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | std::string ARBDecompiler::ImageLoad(Operation operation) { | ||
| 1935 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1936 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1937 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1938 | const std::string_view type = ImageType(meta.image.type); | ||
| 1939 | |||
| 1940 | const std::string temporary = AllocVectorTemporary(); | ||
| 1941 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1942 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1943 | } | ||
| 1944 | AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); | ||
| 1945 | AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1946 | return fmt::format("{}.x", temporary); | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | std::string ARBDecompiler::ImageStore(Operation operation) { | ||
| 1950 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1951 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1952 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 1953 | const std::size_t num_values = meta.values.size(); | ||
| 1954 | const std::string_view type = ImageType(meta.image.type); | ||
| 1955 | |||
| 1956 | const std::string coord = AllocVectorTemporary(); | ||
| 1957 | const std::string value = AllocVectorTemporary(); | ||
| 1958 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 1959 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 1960 | } | ||
| 1961 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 1962 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 1963 | } | ||
| 1964 | AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); | ||
| 1965 | return {}; | ||
| 1966 | } | ||
| 1967 | |||
| 1968 | std::string ARBDecompiler::Branch(Operation operation) { | ||
| 1969 | const auto target = std::get<ImmediateNode>(*operation[0]); | ||
| 1970 | AddLine("MOV.U PC.x, {};", target.GetValue()); | ||
| 1971 | AddLine("CONT;"); | ||
| 1972 | return {}; | ||
| 1973 | } | ||
| 1974 | |||
| 1975 | std::string ARBDecompiler::BranchIndirect(Operation operation) { | ||
| 1976 | AddLine("MOV.U PC.x, {};", Visit(operation[0])); | ||
| 1977 | AddLine("CONT;"); | ||
| 1978 | return {}; | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | std::string ARBDecompiler::PushFlowStack(Operation operation) { | ||
| 1982 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1983 | const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); | ||
| 1984 | const std::string_view stack_name = StackName(stack); | ||
| 1985 | AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); | ||
| 1986 | AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 1987 | return {}; | ||
| 1988 | } | ||
| 1989 | |||
| 1990 | std::string ARBDecompiler::PopFlowStack(Operation operation) { | ||
| 1991 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1992 | const std::string_view stack_name = StackName(stack); | ||
| 1993 | AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 1994 | AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); | ||
| 1995 | AddLine("CONT;"); | ||
| 1996 | return {}; | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | std::string ARBDecompiler::Exit(Operation) { | ||
| 2000 | Exit(); | ||
| 2001 | return {}; | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | std::string ARBDecompiler::Discard(Operation) { | ||
| 2005 | AddLine("KIL TR;"); | ||
| 2006 | return {}; | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | std::string ARBDecompiler::EmitVertex(Operation) { | ||
| 2010 | AddLine("EMIT;"); | ||
| 2011 | return {}; | ||
| 2012 | } | ||
| 2013 | |||
| 2014 | std::string ARBDecompiler::EndPrimitive(Operation) { | ||
| 2015 | AddLine("ENDPRIM;"); | ||
| 2016 | return {}; | ||
| 2017 | } | ||
| 2018 | |||
| 2019 | std::string ARBDecompiler::InvocationId(Operation) { | ||
| 2020 | return "primitive.invocation"; | ||
| 2021 | } | ||
| 2022 | |||
| 2023 | std::string ARBDecompiler::YNegate(Operation) { | ||
| 2024 | LOG_WARNING(Render_OpenGL, "(STUBBED)"); | ||
| 2025 | const std::string temporary = AllocTemporary(); | ||
| 2026 | AddLine("MOV.F {}, 1;", temporary); | ||
| 2027 | return temporary; | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | std::string ARBDecompiler::ThreadId(Operation) { | ||
| 2031 | return fmt::format("{}.threadid", StageInputName(stage)); | ||
| 2032 | } | ||
| 2033 | |||
| 2034 | std::string ARBDecompiler::ShuffleIndexed(Operation operation) { | ||
| 2035 | if (!device.HasWarpIntrinsics()) { | ||
| 2036 | LOG_ERROR(Render_OpenGL, | ||
| 2037 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 2038 | return Visit(operation[0]); | ||
| 2039 | } | ||
| 2040 | const std::string temporary = AllocVectorTemporary(); | ||
| 2041 | AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), | ||
| 2042 | Visit(operation[1])); | ||
| 2043 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 2044 | return fmt::format("{}.x", temporary); | ||
| 2045 | } | ||
| 2046 | |||
| 2047 | std::string ARBDecompiler::Barrier(Operation) { | ||
| 2048 | if (!ir.IsDecompiled()) { | ||
| 2049 | LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled"); | ||
| 2050 | return {}; | ||
| 2051 | } | ||
| 2052 | AddLine("BAR;"); | ||
| 2053 | return {}; | ||
| 2054 | } | ||
| 2055 | |||
| 2056 | std::string ARBDecompiler::MemoryBarrierGroup(Operation) { | ||
| 2057 | AddLine("MEMBAR.CTA;"); | ||
| 2058 | return {}; | ||
| 2059 | } | ||
| 2060 | |||
| 2061 | std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { | ||
| 2062 | AddLine("MEMBAR;"); | ||
| 2063 | return {}; | ||
| 2064 | } | ||
| 2065 | |||
| 2066 | } // Anonymous namespace | ||
| 2067 | |||
| 2068 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 2069 | const VideoCommon::Shader::Registry& registry, | ||
| 2070 | Tegra::Engines::ShaderType stage, std::string_view identifier) { | ||
| 2071 | return ARBDecompiler(device, ir, registry, stage, identifier).Code(); | ||
| 2072 | } | ||
| 2073 | |||
| 2074 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h new file mode 100644 index 000000000..6afc87220 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Tegra::Engines { | ||
| 13 | enum class ShaderType : u32; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | class Registry; | ||
| 19 | } // namespace VideoCommon::Shader | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | class Device; | ||
| 24 | |||
| 25 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 26 | const VideoCommon::Shader::Registry& registry, | ||
| 27 | Tegra::Engines::ShaderType stage, std::string_view identifier); | ||
| 28 | |||
| 29 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 890fc6c63..e245e27ec 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -213,6 +213,7 @@ Device::Device() | |||
| 213 | has_component_indexing_bug = is_amd; | 213 | has_component_indexing_bug = is_amd; |
| 214 | has_precise_bug = TestPreciseBug(); | 214 | has_precise_bug = TestPreciseBug(); |
| 215 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; | 215 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; |
| 216 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | ||
| 216 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | 217 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && |
| 217 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && | 218 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && |
| 218 | GLAD_GL_NV_transform_feedback2; | 219 | GLAD_GL_NV_transform_feedback2; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 98cca0254..145347943 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -88,6 +88,10 @@ public: | |||
| 88 | return has_fast_buffer_sub_data; | 88 | return has_fast_buffer_sub_data; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | bool HasNvViewportArray2() const { | ||
| 92 | return has_nv_viewport_array2; | ||
| 93 | } | ||
| 94 | |||
| 91 | bool UseAssemblyShaders() const { | 95 | bool UseAssemblyShaders() const { |
| 92 | return use_assembly_shaders; | 96 | return use_assembly_shaders; |
| 93 | } | 97 | } |
| @@ -111,6 +115,7 @@ private: | |||
| 111 | bool has_component_indexing_bug{}; | 115 | bool has_component_indexing_bug{}; |
| 112 | bool has_precise_bug{}; | 116 | bool has_precise_bug{}; |
| 113 | bool has_fast_buffer_sub_data{}; | 117 | bool has_fast_buffer_sub_data{}; |
| 118 | bool has_nv_viewport_array2{}; | ||
| 114 | bool use_assembly_shaders{}; | 119 | bool use_assembly_shaders{}; |
| 115 | }; | 120 | }; |
| 116 | 121 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c28486b1d..46e780a06 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/memory_manager.h" | 22 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 25 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 26 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| @@ -148,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u | |||
| 148 | auto program = std::make_shared<ProgramHandle>(); | 149 | auto program = std::make_shared<ProgramHandle>(); |
| 149 | 150 | ||
| 150 | if (device.UseAssemblyShaders()) { | 151 | if (device.UseAssemblyShaders()) { |
| 151 | const std::string arb = "Not implemented"; | 152 | const std::string arb = |
| 153 | DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); | ||
| 152 | 154 | ||
| 153 | GLuint& arb_prog = program->assembly_program.handle; | 155 | GLuint& arb_prog = program->assembly_program.handle; |
| 154 | 156 | ||
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 37aadf7f8..be5006ad3 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent) | |||
| 12 | 12 | ||
| 13 | ui->setupUi(this); | 13 | ui->setupUi(this); |
| 14 | 14 | ||
| 15 | // TODO: Remove this after assembly shaders are fully integrated | ||
| 16 | ui->use_assembly_shaders->setVisible(false); | ||
| 17 | |||
| 18 | SetConfiguration(); | 15 | SetConfiguration(); |
| 19 | } | 16 | } |
| 20 | 17 | ||