diff options
| author | 2021-02-16 20:52:12 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:22 -0400 | |
| commit | c67d64365a712830fe140dd36e24e2efd9b8a812 (patch) | |
| tree | 9287589f2b72d1cbd0cb113c2024b2bc531408c3 /src/video_core/renderer_opengl | |
| parent | shader: Add XMAD multiplication folding optimization (diff) | |
| download | yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.gz yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.xz yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.zip | |
shader: Remove old shader management
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.cpp | 2124 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.h | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 314 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 564 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 102 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2986 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 69 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 482 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 176 |
10 files changed, 8 insertions, 6871 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp deleted file mode 100644 index e8d8d2aa5..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ /dev/null | |||
| @@ -1,2124 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <variant> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/assert.h" | ||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 20 | #include "video_core/shader/registry.h" | ||
| 21 | #include "video_core/shader/shader_ir.h" | ||
| 22 | |||
| 23 | // Predicates in the decompiled code follow the convention that -1 means true and 0 means false. | ||
| 24 | // GLASM lacks booleans, so they have to be implemented as integers. | ||
| 25 | // Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to | ||
| 26 | // select between two values, because -1 will be evaluated as true and 0 as false. | ||
| 27 | |||
| 28 | namespace OpenGL { | ||
| 29 | |||
| 30 | namespace { | ||
| 31 | |||
| 32 | using Tegra::Engines::ShaderType; | ||
| 33 | using Tegra::Shader::Attribute; | ||
| 34 | using Tegra::Shader::PixelImap; | ||
| 35 | using Tegra::Shader::Register; | ||
| 36 | using namespace VideoCommon::Shader; | ||
| 37 | using Operation = const OperationNode&; | ||
| 38 | |||
| 39 | constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; | ||
| 40 | |||
| 41 | char Swizzle(std::size_t component) { | ||
| 42 | static constexpr std::string_view SWIZZLE{"xyzw"}; | ||
| 43 | return SWIZZLE.at(component); | ||
| 44 | } | ||
| 45 | |||
| 46 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 47 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 48 | } | ||
| 49 | |||
| 50 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 51 | ASSERT(IsGenericAttribute(index)); | ||
| 52 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string_view Modifiers(Operation operation) { | ||
| 56 | const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); | ||
| 57 | if (meta && meta->precise) { | ||
| 58 | return ".PREC"; | ||
| 59 | } | ||
| 60 | return ""; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view GetInputFlags(PixelImap attribute) { | ||
| 64 | switch (attribute) { | ||
| 65 | case PixelImap::Perspective: | ||
| 66 | return ""; | ||
| 67 | case PixelImap::Constant: | ||
| 68 | return "FLAT "; | ||
| 69 | case PixelImap::ScreenLinear: | ||
| 70 | return "NOPERSPECTIVE "; | ||
| 71 | case PixelImap::Unused: | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); | ||
| 75 | return {}; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string_view ImageType(Tegra::Shader::ImageType image_type) { | ||
| 79 | switch (image_type) { | ||
| 80 | case Tegra::Shader::ImageType::Texture1D: | ||
| 81 | return "1D"; | ||
| 82 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 83 | return "BUFFER"; | ||
| 84 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 85 | return "ARRAY1D"; | ||
| 86 | case Tegra::Shader::ImageType::Texture2D: | ||
| 87 | return "2D"; | ||
| 88 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 89 | return "ARRAY2D"; | ||
| 90 | case Tegra::Shader::ImageType::Texture3D: | ||
| 91 | return "3D"; | ||
| 92 | } | ||
| 93 | UNREACHABLE(); | ||
| 94 | return {}; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::string_view StackName(MetaStackClass stack) { | ||
| 98 | switch (stack) { | ||
| 99 | case MetaStackClass::Ssy: | ||
| 100 | return "SSY"; | ||
| 101 | case MetaStackClass::Pbk: | ||
| 102 | return "PBK"; | ||
| 103 | } | ||
| 104 | UNREACHABLE(); | ||
| 105 | return ""; | ||
| 106 | }; | ||
| 107 | |||
| 108 | std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | ||
| 109 | switch (topology) { | ||
| 110 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: | ||
| 111 | return "POINTS"; | ||
| 112 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: | ||
| 113 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: | ||
| 114 | return "LINES"; | ||
| 115 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: | ||
| 116 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: | ||
| 117 | return "LINES_ADJACENCY"; | ||
| 118 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: | ||
| 119 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | ||
| 120 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | ||
| 121 | return "TRIANGLES"; | ||
| 122 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | ||
| 123 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||
| 124 | return "TRIANGLES_ADJACENCY"; | ||
| 125 | default: | ||
| 126 | UNIMPLEMENTED_MSG("topology={}", topology); | ||
| 127 | return "POINTS"; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 132 | switch (topology) { | ||
| 133 | case Tegra::Shader::OutputTopology::PointList: | ||
| 134 | return "POINTS"; | ||
| 135 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 136 | return "LINE_STRIP"; | ||
| 137 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 138 | return "TRIANGLE_STRIP"; | ||
| 139 | default: | ||
| 140 | UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); | ||
| 141 | return "points"; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | std::string_view StageInputName(ShaderType stage) { | ||
| 146 | switch (stage) { | ||
| 147 | case ShaderType::Vertex: | ||
| 148 | case ShaderType::Geometry: | ||
| 149 | return "vertex"; | ||
| 150 | case ShaderType::Fragment: | ||
| 151 | return "fragment"; | ||
| 152 | case ShaderType::Compute: | ||
| 153 | return "invocation"; | ||
| 154 | default: | ||
| 155 | UNREACHABLE(); | ||
| 156 | return ""; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | std::string TextureType(const MetaTexture& meta) { | ||
| 161 | if (meta.sampler.is_buffer) { | ||
| 162 | return "BUFFER"; | ||
| 163 | } | ||
| 164 | std::string type; | ||
| 165 | if (meta.sampler.is_shadow) { | ||
| 166 | type += "SHADOW"; | ||
| 167 | } | ||
| 168 | if (meta.sampler.is_array) { | ||
| 169 | type += "ARRAY"; | ||
| 170 | } | ||
| 171 | type += [&meta] { | ||
| 172 | switch (meta.sampler.type) { | ||
| 173 | case Tegra::Shader::TextureType::Texture1D: | ||
| 174 | return "1D"; | ||
| 175 | case Tegra::Shader::TextureType::Texture2D: | ||
| 176 | return "2D"; | ||
| 177 | case Tegra::Shader::TextureType::Texture3D: | ||
| 178 | return "3D"; | ||
| 179 | case Tegra::Shader::TextureType::TextureCube: | ||
| 180 | return "CUBE"; | ||
| 181 | } | ||
| 182 | UNREACHABLE(); | ||
| 183 | return "2D"; | ||
| 184 | }(); | ||
| 185 | return type; | ||
| 186 | } | ||
| 187 | |||
| 188 | class ARBDecompiler final { | ||
| 189 | public: | ||
| 190 | explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | ||
| 191 | ShaderType stage_, std::string_view identifier); | ||
| 192 | |||
| 193 | std::string Code() const { | ||
| 194 | return shader_source; | ||
| 195 | } | ||
| 196 | |||
| 197 | private: | ||
| 198 | void DefineGlobalMemory(); | ||
| 199 | |||
| 200 | void DeclareHeader(); | ||
| 201 | void DeclareVertex(); | ||
| 202 | void DeclareGeometry(); | ||
| 203 | void DeclareFragment(); | ||
| 204 | void DeclareCompute(); | ||
| 205 | void DeclareInputAttributes(); | ||
| 206 | void DeclareOutputAttributes(); | ||
| 207 | void DeclareLocalMemory(); | ||
| 208 | void DeclareGlobalMemory(); | ||
| 209 | void DeclareConstantBuffers(); | ||
| 210 | void DeclareRegisters(); | ||
| 211 | void DeclareTemporaries(); | ||
| 212 | void DeclarePredicates(); | ||
| 213 | void DeclareInternalFlags(); | ||
| 214 | |||
| 215 | void InitializeVariables(); | ||
| 216 | |||
| 217 | void DecompileAST(); | ||
| 218 | void DecompileBranchMode(); | ||
| 219 | |||
| 220 | void VisitAST(const ASTNode& node); | ||
| 221 | std::string VisitExpression(const Expr& node); | ||
| 222 | |||
| 223 | void VisitBlock(const NodeBlock& bb); | ||
| 224 | |||
| 225 | std::string Visit(const Node& node); | ||
| 226 | |||
| 227 | std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation); | ||
| 228 | std::string BuildAoffi(Operation); | ||
| 229 | std::string GlobalMemoryPointer(const GmemNode& gmem); | ||
| 230 | void Exit(); | ||
| 231 | |||
| 232 | std::string Assign(Operation); | ||
| 233 | std::string Select(Operation); | ||
| 234 | std::string FClamp(Operation); | ||
| 235 | std::string FCastHalf0(Operation); | ||
| 236 | std::string FCastHalf1(Operation); | ||
| 237 | std::string FSqrt(Operation); | ||
| 238 | std::string FSwizzleAdd(Operation); | ||
| 239 | std::string HAdd2(Operation); | ||
| 240 | std::string HMul2(Operation); | ||
| 241 | std::string HFma2(Operation); | ||
| 242 | std::string HAbsolute(Operation); | ||
| 243 | std::string HNegate(Operation); | ||
| 244 | std::string HClamp(Operation); | ||
| 245 | std::string HCastFloat(Operation); | ||
| 246 | std::string HUnpack(Operation); | ||
| 247 | std::string HMergeF32(Operation); | ||
| 248 | std::string HMergeH0(Operation); | ||
| 249 | std::string HMergeH1(Operation); | ||
| 250 | std::string HPack2(Operation); | ||
| 251 | std::string LogicalAssign(Operation); | ||
| 252 | std::string LogicalPick2(Operation); | ||
| 253 | std::string LogicalAnd2(Operation); | ||
| 254 | std::string FloatOrdered(Operation); | ||
| 255 | std::string FloatUnordered(Operation); | ||
| 256 | std::string LogicalAddCarry(Operation); | ||
| 257 | std::string Texture(Operation); | ||
| 258 | std::string TextureGather(Operation); | ||
| 259 | std::string TextureQueryDimensions(Operation); | ||
| 260 | std::string TextureQueryLod(Operation); | ||
| 261 | std::string TexelFetch(Operation); | ||
| 262 | std::string TextureGradient(Operation); | ||
| 263 | std::string ImageLoad(Operation); | ||
| 264 | std::string ImageStore(Operation); | ||
| 265 | std::string Branch(Operation); | ||
| 266 | std::string BranchIndirect(Operation); | ||
| 267 | std::string PushFlowStack(Operation); | ||
| 268 | std::string PopFlowStack(Operation); | ||
| 269 | std::string Exit(Operation); | ||
| 270 | std::string Discard(Operation); | ||
| 271 | std::string EmitVertex(Operation); | ||
| 272 | std::string EndPrimitive(Operation); | ||
| 273 | std::string InvocationId(Operation); | ||
| 274 | std::string YNegate(Operation); | ||
| 275 | std::string ThreadId(Operation); | ||
| 276 | std::string ShuffleIndexed(Operation); | ||
| 277 | std::string Barrier(Operation); | ||
| 278 | std::string MemoryBarrierGroup(Operation); | ||
| 279 | std::string MemoryBarrierGlobal(Operation); | ||
| 280 | |||
| 281 | template <const std::string_view& op> | ||
| 282 | std::string Unary(Operation operation) { | ||
| 283 | std::string temporary = AllocTemporary(); | ||
| 284 | AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); | ||
| 285 | return temporary; | ||
| 286 | } | ||
| 287 | |||
| 288 | template <const std::string_view& op> | ||
| 289 | std::string Binary(Operation operation) { | ||
| 290 | std::string temporary = AllocTemporary(); | ||
| 291 | AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 292 | Visit(operation[1])); | ||
| 293 | return temporary; | ||
| 294 | } | ||
| 295 | |||
| 296 | template <const std::string_view& op> | ||
| 297 | std::string Trinary(Operation operation) { | ||
| 298 | std::string temporary = AllocTemporary(); | ||
| 299 | AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 300 | Visit(operation[1]), Visit(operation[2])); | ||
| 301 | return temporary; | ||
| 302 | } | ||
| 303 | |||
| 304 | template <const std::string_view& op, bool unordered> | ||
| 305 | std::string FloatComparison(Operation operation) { | ||
| 306 | std::string temporary = AllocTemporary(); | ||
| 307 | AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); | ||
| 308 | AddLine("MOV.S {}, 0;", temporary); | ||
| 309 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 310 | |||
| 311 | const std::string op_a = Visit(operation[0]); | ||
| 312 | const std::string op_b = Visit(operation[1]); | ||
| 313 | if constexpr (unordered) { | ||
| 314 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 315 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 316 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 317 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 318 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 319 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 320 | } else if (op == SNE_F) { | ||
| 321 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 322 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 323 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 324 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 325 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 326 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 327 | } | ||
| 328 | return temporary; | ||
| 329 | } | ||
| 330 | |||
| 331 | template <const std::string_view& op, bool is_nan> | ||
| 332 | std::string HalfComparison(Operation operation) { | ||
| 333 | std::string tmp1 = AllocVectorTemporary(); | ||
| 334 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 335 | const std::string op_a = Visit(operation[0]); | ||
| 336 | const std::string op_b = Visit(operation[1]); | ||
| 337 | AddLine("UP2H.F {}, {};", tmp1, op_a); | ||
| 338 | AddLine("UP2H.F {}, {};", tmp2, op_b); | ||
| 339 | AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); | ||
| 340 | AddLine("TRUNC.U.CC RC.xy, {};", tmp1); | ||
| 341 | AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); | ||
| 342 | AddLine("MOV.S {}.x (NE.x), -1;", tmp1); | ||
| 343 | AddLine("MOV.S {}.y (NE.y), -1;", tmp1); | ||
| 344 | if constexpr (is_nan) { | ||
| 345 | AddLine("MOVC.F RC.x, {};", op_a); | ||
| 346 | AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); | ||
| 347 | AddLine("MOVC.F RC.x, {};", op_b); | ||
| 348 | AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); | ||
| 349 | } | ||
| 350 | return tmp1; | ||
| 351 | } | ||
| 352 | |||
| 353 | template <const std::string_view& op, const std::string_view& type> | ||
| 354 | std::string AtomicImage(Operation operation) { | ||
| 355 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 356 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 357 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 358 | const std::size_t num_values = meta.values.size(); | ||
| 359 | |||
| 360 | const std::string coord = AllocVectorTemporary(); | ||
| 361 | const std::string value = AllocVectorTemporary(); | ||
| 362 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 363 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 364 | } | ||
| 365 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 366 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 367 | } | ||
| 368 | |||
| 369 | AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, | ||
| 370 | image_id, ImageType(meta.image.type)); | ||
| 371 | return fmt::format("{}.x", coord); | ||
| 372 | } | ||
| 373 | |||
| 374 | template <const std::string_view& op, const std::string_view& type> | ||
| 375 | std::string Atomic(Operation operation) { | ||
| 376 | std::string temporary = AllocTemporary(); | ||
| 377 | std::string address; | ||
| 378 | std::string_view opname; | ||
| 379 | bool robust = false; | ||
| 380 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||
| 381 | address = GlobalMemoryPointer(*gmem); | ||
| 382 | opname = "ATOM"; | ||
| 383 | robust = true; | ||
| 384 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||
| 385 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | ||
| 386 | opname = "ATOMS"; | ||
| 387 | } else { | ||
| 388 | UNREACHABLE(); | ||
| 389 | return "{0, 0, 0, 0}"; | ||
| 390 | } | ||
| 391 | if (robust) { | ||
| 392 | AddLine("IF NE.x;"); | ||
| 393 | } | ||
| 394 | AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | ||
| 395 | if (robust) { | ||
| 396 | AddLine("ELSE;"); | ||
| 397 | AddLine("MOV.S {}, 0;", temporary); | ||
| 398 | AddLine("ENDIF;"); | ||
| 399 | } | ||
| 400 | return temporary; | ||
| 401 | } | ||
| 402 | |||
| 403 | template <char type> | ||
| 404 | std::string Negate(Operation operation) { | ||
| 405 | std::string temporary = AllocTemporary(); | ||
| 406 | if constexpr (type == 'F') { | ||
| 407 | AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); | ||
| 408 | } else { | ||
| 409 | AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); | ||
| 410 | } | ||
| 411 | return temporary; | ||
| 412 | } | ||
| 413 | |||
| 414 | template <char type> | ||
| 415 | std::string Absolute(Operation operation) { | ||
| 416 | std::string temporary = AllocTemporary(); | ||
| 417 | AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); | ||
| 418 | return temporary; | ||
| 419 | } | ||
| 420 | |||
| 421 | template <char type> | ||
| 422 | std::string BitfieldInsert(Operation operation) { | ||
| 423 | const std::string temporary = AllocVectorTemporary(); | ||
| 424 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); | ||
| 425 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); | ||
| 426 | AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), | ||
| 427 | Visit(operation[0])); | ||
| 428 | return fmt::format("{}.x", temporary); | ||
| 429 | } | ||
| 430 | |||
| 431 | template <char type> | ||
| 432 | std::string BitfieldExtract(Operation operation) { | ||
| 433 | const std::string temporary = AllocVectorTemporary(); | ||
| 434 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); | ||
| 435 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); | ||
| 436 | AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); | ||
| 437 | return fmt::format("{}.x", temporary); | ||
| 438 | } | ||
| 439 | |||
| 440 | template <char swizzle> | ||
| 441 | std::string LocalInvocationId(Operation) { | ||
| 442 | return fmt::format("invocation.localid.{}", swizzle); | ||
| 443 | } | ||
| 444 | |||
| 445 | template <char swizzle> | ||
| 446 | std::string WorkGroupId(Operation) { | ||
| 447 | return fmt::format("invocation.groupid.{}", swizzle); | ||
| 448 | } | ||
| 449 | |||
| 450 | template <char c1, char c2> | ||
| 451 | std::string ThreadMask(Operation) { | ||
| 452 | return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); | ||
| 453 | } | ||
| 454 | |||
| 455 | template <typename... Args> | ||
| 456 | void AddExpression(std::string_view text, Args&&... args) { | ||
| 457 | shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...); | ||
| 458 | } | ||
| 459 | |||
| 460 | template <typename... Args> | ||
| 461 | void AddLine(std::string_view text, Args&&... args) { | ||
| 462 | AddExpression(text, std::forward<Args>(args)...); | ||
| 463 | shader_source += '\n'; | ||
| 464 | } | ||
| 465 | |||
| 466 | std::string AllocLongVectorTemporary() { | ||
| 467 | max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); | ||
| 468 | return fmt::format("L{}", num_long_temporaries++); | ||
| 469 | } | ||
| 470 | |||
| 471 | std::string AllocLongTemporary() { | ||
| 472 | return fmt::format("{}.x", AllocLongVectorTemporary()); | ||
| 473 | } | ||
| 474 | |||
| 475 | std::string AllocVectorTemporary() { | ||
| 476 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 477 | return fmt::format("T{}", num_temporaries++); | ||
| 478 | } | ||
| 479 | |||
| 480 | std::string AllocTemporary() { | ||
| 481 | return fmt::format("{}.x", AllocVectorTemporary()); | ||
| 482 | } | ||
| 483 | |||
| 484 | void ResetTemporaries() noexcept { | ||
| 485 | num_temporaries = 0; | ||
| 486 | num_long_temporaries = 0; | ||
| 487 | } | ||
| 488 | |||
| 489 | const Device& device; | ||
| 490 | const ShaderIR& ir; | ||
| 491 | const Registry& registry; | ||
| 492 | const ShaderType stage; | ||
| 493 | |||
| 494 | std::size_t num_temporaries = 0; | ||
| 495 | std::size_t max_temporaries = 0; | ||
| 496 | |||
| 497 | std::size_t num_long_temporaries = 0; | ||
| 498 | std::size_t max_long_temporaries = 0; | ||
| 499 | |||
| 500 | std::map<GlobalMemoryBase, u32> global_memory_names; | ||
| 501 | |||
| 502 | std::string shader_source; | ||
| 503 | |||
| 504 | static constexpr std::string_view ADD_F32 = "ADD.F32"; | ||
| 505 | static constexpr std::string_view ADD_S = "ADD.S"; | ||
| 506 | static constexpr std::string_view ADD_U = "ADD.U"; | ||
| 507 | static constexpr std::string_view MUL_F32 = "MUL.F32"; | ||
| 508 | static constexpr std::string_view MUL_S = "MUL.S"; | ||
| 509 | static constexpr std::string_view MUL_U = "MUL.U"; | ||
| 510 | static constexpr std::string_view DIV_F32 = "DIV.F32"; | ||
| 511 | static constexpr std::string_view DIV_S = "DIV.S"; | ||
| 512 | static constexpr std::string_view DIV_U = "DIV.U"; | ||
| 513 | static constexpr std::string_view MAD_F32 = "MAD.F32"; | ||
| 514 | static constexpr std::string_view RSQ_F32 = "RSQ.F32"; | ||
| 515 | static constexpr std::string_view COS_F32 = "COS.F32"; | ||
| 516 | static constexpr std::string_view SIN_F32 = "SIN.F32"; | ||
| 517 | static constexpr std::string_view EX2_F32 = "EX2.F32"; | ||
| 518 | static constexpr std::string_view LG2_F32 = "LG2.F32"; | ||
| 519 | static constexpr std::string_view SLT_F = "SLT.F32"; | ||
| 520 | static constexpr std::string_view SLT_S = "SLT.S"; | ||
| 521 | static constexpr std::string_view SLT_U = "SLT.U"; | ||
| 522 | static constexpr std::string_view SEQ_F = "SEQ.F32"; | ||
| 523 | static constexpr std::string_view SEQ_S = "SEQ.S"; | ||
| 524 | static constexpr std::string_view SEQ_U = "SEQ.U"; | ||
| 525 | static constexpr std::string_view SLE_F = "SLE.F32"; | ||
| 526 | static constexpr std::string_view SLE_S = "SLE.S"; | ||
| 527 | static constexpr std::string_view SLE_U = "SLE.U"; | ||
| 528 | static constexpr std::string_view SGT_F = "SGT.F32"; | ||
| 529 | static constexpr std::string_view SGT_S = "SGT.S"; | ||
| 530 | static constexpr std::string_view SGT_U = "SGT.U"; | ||
| 531 | static constexpr std::string_view SNE_F = "SNE.F32"; | ||
| 532 | static constexpr std::string_view SNE_S = "SNE.S"; | ||
| 533 | static constexpr std::string_view SNE_U = "SNE.U"; | ||
| 534 | static constexpr std::string_view SGE_F = "SGE.F32"; | ||
| 535 | static constexpr std::string_view SGE_S = "SGE.S"; | ||
| 536 | static constexpr std::string_view SGE_U = "SGE.U"; | ||
| 537 | static constexpr std::string_view AND_S = "AND.S"; | ||
| 538 | static constexpr std::string_view AND_U = "AND.U"; | ||
| 539 | static constexpr std::string_view TRUNC_F = "TRUNC.F"; | ||
| 540 | static constexpr std::string_view TRUNC_S = "TRUNC.S"; | ||
| 541 | static constexpr std::string_view TRUNC_U = "TRUNC.U"; | ||
| 542 | static constexpr std::string_view SHL_S = "SHL.S"; | ||
| 543 | static constexpr std::string_view SHL_U = "SHL.U"; | ||
| 544 | static constexpr std::string_view SHR_S = "SHR.S"; | ||
| 545 | static constexpr std::string_view SHR_U = "SHR.U"; | ||
| 546 | static constexpr std::string_view OR_S = "OR.S"; | ||
| 547 | static constexpr std::string_view OR_U = "OR.U"; | ||
| 548 | static constexpr std::string_view XOR_S = "XOR.S"; | ||
| 549 | static constexpr std::string_view XOR_U = "XOR.U"; | ||
| 550 | static constexpr std::string_view NOT_S = "NOT.S"; | ||
| 551 | static constexpr std::string_view NOT_U = "NOT.U"; | ||
| 552 | static constexpr std::string_view BTC_S = "BTC.S"; | ||
| 553 | static constexpr std::string_view BTC_U = "BTC.U"; | ||
| 554 | static constexpr std::string_view BTFM_S = "BTFM.S"; | ||
| 555 | static constexpr std::string_view BTFM_U = "BTFM.U"; | ||
| 556 | static constexpr std::string_view ROUND_F = "ROUND.F"; | ||
| 557 | static constexpr std::string_view CEIL_F = "CEIL.F"; | ||
| 558 | static constexpr std::string_view FLR_F = "FLR.F"; | ||
| 559 | static constexpr std::string_view I2F_S = "I2F.S"; | ||
| 560 | static constexpr std::string_view I2F_U = "I2F.U"; | ||
| 561 | static constexpr std::string_view MIN_F = "MIN.F"; | ||
| 562 | static constexpr std::string_view MIN_S = "MIN.S"; | ||
| 563 | static constexpr std::string_view MIN_U = "MIN.U"; | ||
| 564 | static constexpr std::string_view MAX_F = "MAX.F"; | ||
| 565 | static constexpr std::string_view MAX_S = "MAX.S"; | ||
| 566 | static constexpr std::string_view MAX_U = "MAX.U"; | ||
| 567 | static constexpr std::string_view MOV_U = "MOV.U"; | ||
| 568 | static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; | ||
| 569 | static constexpr std::string_view TGALL_U = "TGALL.U"; | ||
| 570 | static constexpr std::string_view TGANY_U = "TGANY.U"; | ||
| 571 | static constexpr std::string_view TGEQ_U = "TGEQ.U"; | ||
| 572 | static constexpr std::string_view EXCH = "EXCH"; | ||
| 573 | static constexpr std::string_view ADD = "ADD"; | ||
| 574 | static constexpr std::string_view MIN = "MIN"; | ||
| 575 | static constexpr std::string_view MAX = "MAX"; | ||
| 576 | static constexpr std::string_view AND = "AND"; | ||
| 577 | static constexpr std::string_view OR = "OR"; | ||
| 578 | static constexpr std::string_view XOR = "XOR"; | ||
| 579 | static constexpr std::string_view U32 = "U32"; | ||
| 580 | static constexpr std::string_view S32 = "S32"; | ||
| 581 | |||
| 582 | static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); | ||
| 583 | using DecompilerType = std::string (ARBDecompiler::*)(Operation); | ||
| 584 | static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { | ||
| 585 | &ARBDecompiler::Assign, | ||
| 586 | |||
| 587 | &ARBDecompiler::Select, | ||
| 588 | |||
| 589 | &ARBDecompiler::Binary<ADD_F32>, | ||
| 590 | &ARBDecompiler::Binary<MUL_F32>, | ||
| 591 | &ARBDecompiler::Binary<DIV_F32>, | ||
| 592 | &ARBDecompiler::Trinary<MAD_F32>, | ||
| 593 | &ARBDecompiler::Negate<'F'>, | ||
| 594 | &ARBDecompiler::Absolute<'F'>, | ||
| 595 | &ARBDecompiler::FClamp, | ||
| 596 | &ARBDecompiler::FCastHalf0, | ||
| 597 | &ARBDecompiler::FCastHalf1, | ||
| 598 | &ARBDecompiler::Binary<MIN_F>, | ||
| 599 | &ARBDecompiler::Binary<MAX_F>, | ||
| 600 | &ARBDecompiler::Unary<COS_F32>, | ||
| 601 | &ARBDecompiler::Unary<SIN_F32>, | ||
| 602 | &ARBDecompiler::Unary<EX2_F32>, | ||
| 603 | &ARBDecompiler::Unary<LG2_F32>, | ||
| 604 | &ARBDecompiler::Unary<RSQ_F32>, | ||
| 605 | &ARBDecompiler::FSqrt, | ||
| 606 | &ARBDecompiler::Unary<ROUND_F>, | ||
| 607 | &ARBDecompiler::Unary<FLR_F>, | ||
| 608 | &ARBDecompiler::Unary<CEIL_F>, | ||
| 609 | &ARBDecompiler::Unary<TRUNC_F>, | ||
| 610 | &ARBDecompiler::Unary<I2F_S>, | ||
| 611 | &ARBDecompiler::Unary<I2F_U>, | ||
| 612 | &ARBDecompiler::FSwizzleAdd, | ||
| 613 | |||
| 614 | &ARBDecompiler::Binary<ADD_S>, | ||
| 615 | &ARBDecompiler::Binary<MUL_S>, | ||
| 616 | &ARBDecompiler::Binary<DIV_S>, | ||
| 617 | &ARBDecompiler::Negate<'S'>, | ||
| 618 | &ARBDecompiler::Absolute<'S'>, | ||
| 619 | &ARBDecompiler::Binary<MIN_S>, | ||
| 620 | &ARBDecompiler::Binary<MAX_S>, | ||
| 621 | |||
| 622 | &ARBDecompiler::Unary<TRUNC_S>, | ||
| 623 | &ARBDecompiler::Unary<MOV_U>, | ||
| 624 | &ARBDecompiler::Binary<SHL_S>, | ||
| 625 | &ARBDecompiler::Binary<SHR_U>, | ||
| 626 | &ARBDecompiler::Binary<SHR_S>, | ||
| 627 | &ARBDecompiler::Binary<AND_S>, | ||
| 628 | &ARBDecompiler::Binary<OR_S>, | ||
| 629 | &ARBDecompiler::Binary<XOR_S>, | ||
| 630 | &ARBDecompiler::Unary<NOT_S>, | ||
| 631 | &ARBDecompiler::BitfieldInsert<'S'>, | ||
| 632 | &ARBDecompiler::BitfieldExtract<'S'>, | ||
| 633 | &ARBDecompiler::Unary<BTC_S>, | ||
| 634 | &ARBDecompiler::Unary<BTFM_S>, | ||
| 635 | |||
| 636 | &ARBDecompiler::Binary<ADD_U>, | ||
| 637 | &ARBDecompiler::Binary<MUL_U>, | ||
| 638 | &ARBDecompiler::Binary<DIV_U>, | ||
| 639 | &ARBDecompiler::Binary<MIN_U>, | ||
| 640 | &ARBDecompiler::Binary<MAX_U>, | ||
| 641 | &ARBDecompiler::Unary<TRUNC_U>, | ||
| 642 | &ARBDecompiler::Unary<MOV_U>, | ||
| 643 | &ARBDecompiler::Binary<SHL_U>, | ||
| 644 | &ARBDecompiler::Binary<SHR_U>, | ||
| 645 | &ARBDecompiler::Binary<SHR_U>, | ||
| 646 | &ARBDecompiler::Binary<AND_U>, | ||
| 647 | &ARBDecompiler::Binary<OR_U>, | ||
| 648 | &ARBDecompiler::Binary<XOR_U>, | ||
| 649 | &ARBDecompiler::Unary<NOT_U>, | ||
| 650 | &ARBDecompiler::BitfieldInsert<'U'>, | ||
| 651 | &ARBDecompiler::BitfieldExtract<'U'>, | ||
| 652 | &ARBDecompiler::Unary<BTC_U>, | ||
| 653 | &ARBDecompiler::Unary<BTFM_U>, | ||
| 654 | |||
| 655 | &ARBDecompiler::HAdd2, | ||
| 656 | &ARBDecompiler::HMul2, | ||
| 657 | &ARBDecompiler::HFma2, | ||
| 658 | &ARBDecompiler::HAbsolute, | ||
| 659 | &ARBDecompiler::HNegate, | ||
| 660 | &ARBDecompiler::HClamp, | ||
| 661 | &ARBDecompiler::HCastFloat, | ||
| 662 | &ARBDecompiler::HUnpack, | ||
| 663 | &ARBDecompiler::HMergeF32, | ||
| 664 | &ARBDecompiler::HMergeH0, | ||
| 665 | &ARBDecompiler::HMergeH1, | ||
| 666 | &ARBDecompiler::HPack2, | ||
| 667 | |||
| 668 | &ARBDecompiler::LogicalAssign, | ||
| 669 | &ARBDecompiler::Binary<AND_U>, | ||
| 670 | &ARBDecompiler::Binary<OR_U>, | ||
| 671 | &ARBDecompiler::Binary<XOR_U>, | ||
| 672 | &ARBDecompiler::Unary<NOT_U>, | ||
| 673 | &ARBDecompiler::LogicalPick2, | ||
| 674 | &ARBDecompiler::LogicalAnd2, | ||
| 675 | |||
| 676 | &ARBDecompiler::FloatComparison<SLT_F, false>, | ||
| 677 | &ARBDecompiler::FloatComparison<SEQ_F, false>, | ||
| 678 | &ARBDecompiler::FloatComparison<SLE_F, false>, | ||
| 679 | &ARBDecompiler::FloatComparison<SGT_F, false>, | ||
| 680 | &ARBDecompiler::FloatComparison<SNE_F, false>, | ||
| 681 | &ARBDecompiler::FloatComparison<SGE_F, false>, | ||
| 682 | &ARBDecompiler::FloatOrdered, | ||
| 683 | &ARBDecompiler::FloatUnordered, | ||
| 684 | &ARBDecompiler::FloatComparison<SLT_F, true>, | ||
| 685 | &ARBDecompiler::FloatComparison<SEQ_F, true>, | ||
| 686 | &ARBDecompiler::FloatComparison<SLE_F, true>, | ||
| 687 | &ARBDecompiler::FloatComparison<SGT_F, true>, | ||
| 688 | &ARBDecompiler::FloatComparison<SNE_F, true>, | ||
| 689 | &ARBDecompiler::FloatComparison<SGE_F, true>, | ||
| 690 | |||
| 691 | &ARBDecompiler::Binary<SLT_S>, | ||
| 692 | &ARBDecompiler::Binary<SEQ_S>, | ||
| 693 | &ARBDecompiler::Binary<SLE_S>, | ||
| 694 | &ARBDecompiler::Binary<SGT_S>, | ||
| 695 | &ARBDecompiler::Binary<SNE_S>, | ||
| 696 | &ARBDecompiler::Binary<SGE_S>, | ||
| 697 | |||
| 698 | &ARBDecompiler::Binary<SLT_U>, | ||
| 699 | &ARBDecompiler::Binary<SEQ_U>, | ||
| 700 | &ARBDecompiler::Binary<SLE_U>, | ||
| 701 | &ARBDecompiler::Binary<SGT_U>, | ||
| 702 | &ARBDecompiler::Binary<SNE_U>, | ||
| 703 | &ARBDecompiler::Binary<SGE_U>, | ||
| 704 | |||
| 705 | &ARBDecompiler::LogicalAddCarry, | ||
| 706 | |||
| 707 | &ARBDecompiler::HalfComparison<SLT_F, false>, | ||
| 708 | &ARBDecompiler::HalfComparison<SEQ_F, false>, | ||
| 709 | &ARBDecompiler::HalfComparison<SLE_F, false>, | ||
| 710 | &ARBDecompiler::HalfComparison<SGT_F, false>, | ||
| 711 | &ARBDecompiler::HalfComparison<SNE_F, false>, | ||
| 712 | &ARBDecompiler::HalfComparison<SGE_F, false>, | ||
| 713 | &ARBDecompiler::HalfComparison<SLT_F, true>, | ||
| 714 | &ARBDecompiler::HalfComparison<SEQ_F, true>, | ||
| 715 | &ARBDecompiler::HalfComparison<SLE_F, true>, | ||
| 716 | &ARBDecompiler::HalfComparison<SGT_F, true>, | ||
| 717 | &ARBDecompiler::HalfComparison<SNE_F, true>, | ||
| 718 | &ARBDecompiler::HalfComparison<SGE_F, true>, | ||
| 719 | |||
| 720 | &ARBDecompiler::Texture, | ||
| 721 | &ARBDecompiler::Texture, | ||
| 722 | &ARBDecompiler::TextureGather, | ||
| 723 | &ARBDecompiler::TextureQueryDimensions, | ||
| 724 | &ARBDecompiler::TextureQueryLod, | ||
| 725 | &ARBDecompiler::TexelFetch, | ||
| 726 | &ARBDecompiler::TextureGradient, | ||
| 727 | |||
| 728 | &ARBDecompiler::ImageLoad, | ||
| 729 | &ARBDecompiler::ImageStore, | ||
| 730 | |||
| 731 | &ARBDecompiler::AtomicImage<ADD, U32>, | ||
| 732 | &ARBDecompiler::AtomicImage<AND, U32>, | ||
| 733 | &ARBDecompiler::AtomicImage<OR, U32>, | ||
| 734 | &ARBDecompiler::AtomicImage<XOR, U32>, | ||
| 735 | &ARBDecompiler::AtomicImage<EXCH, U32>, | ||
| 736 | |||
| 737 | &ARBDecompiler::Atomic<EXCH, U32>, | ||
| 738 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 739 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 740 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 741 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 742 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 743 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 744 | |||
| 745 | &ARBDecompiler::Atomic<EXCH, S32>, | ||
| 746 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 747 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 748 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 749 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 750 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 751 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 752 | |||
| 753 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 754 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 755 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 756 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 757 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 758 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 759 | |||
| 760 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 761 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 762 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 763 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 764 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 765 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 766 | |||
| 767 | &ARBDecompiler::Branch, | ||
| 768 | &ARBDecompiler::BranchIndirect, | ||
| 769 | &ARBDecompiler::PushFlowStack, | ||
| 770 | &ARBDecompiler::PopFlowStack, | ||
| 771 | &ARBDecompiler::Exit, | ||
| 772 | &ARBDecompiler::Discard, | ||
| 773 | |||
| 774 | &ARBDecompiler::EmitVertex, | ||
| 775 | &ARBDecompiler::EndPrimitive, | ||
| 776 | |||
| 777 | &ARBDecompiler::InvocationId, | ||
| 778 | &ARBDecompiler::YNegate, | ||
| 779 | &ARBDecompiler::LocalInvocationId<'x'>, | ||
| 780 | &ARBDecompiler::LocalInvocationId<'y'>, | ||
| 781 | &ARBDecompiler::LocalInvocationId<'z'>, | ||
| 782 | &ARBDecompiler::WorkGroupId<'x'>, | ||
| 783 | &ARBDecompiler::WorkGroupId<'y'>, | ||
| 784 | &ARBDecompiler::WorkGroupId<'z'>, | ||
| 785 | |||
| 786 | &ARBDecompiler::Unary<TGBALLOT_U>, | ||
| 787 | &ARBDecompiler::Unary<TGALL_U>, | ||
| 788 | &ARBDecompiler::Unary<TGANY_U>, | ||
| 789 | &ARBDecompiler::Unary<TGEQ_U>, | ||
| 790 | |||
| 791 | &ARBDecompiler::ThreadId, | ||
| 792 | &ARBDecompiler::ThreadMask<'e', 'q'>, | ||
| 793 | &ARBDecompiler::ThreadMask<'g', 'e'>, | ||
| 794 | &ARBDecompiler::ThreadMask<'g', 't'>, | ||
| 795 | &ARBDecompiler::ThreadMask<'l', 'e'>, | ||
| 796 | &ARBDecompiler::ThreadMask<'l', 't'>, | ||
| 797 | &ARBDecompiler::ShuffleIndexed, | ||
| 798 | |||
| 799 | &ARBDecompiler::Barrier, | ||
| 800 | &ARBDecompiler::MemoryBarrierGroup, | ||
| 801 | &ARBDecompiler::MemoryBarrierGlobal, | ||
| 802 | }; | ||
| 803 | }; | ||
| 804 | |||
| 805 | ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | ||
| 806 | ShaderType stage_, std::string_view identifier) | ||
| 807 | : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { | ||
| 808 | DefineGlobalMemory(); | ||
| 809 | |||
| 810 | AddLine("TEMP RC;"); | ||
| 811 | AddLine("TEMP FSWZA[4];"); | ||
| 812 | AddLine("TEMP FSWZB[4];"); | ||
| 813 | if (ir.IsDecompiled()) { | ||
| 814 | DecompileAST(); | ||
| 815 | } else { | ||
| 816 | DecompileBranchMode(); | ||
| 817 | } | ||
| 818 | AddLine("END"); | ||
| 819 | |||
| 820 | const std::string code = std::move(shader_source); | ||
| 821 | DeclareHeader(); | ||
| 822 | DeclareVertex(); | ||
| 823 | DeclareGeometry(); | ||
| 824 | DeclareFragment(); | ||
| 825 | DeclareCompute(); | ||
| 826 | DeclareInputAttributes(); | ||
| 827 | DeclareOutputAttributes(); | ||
| 828 | DeclareLocalMemory(); | ||
| 829 | DeclareGlobalMemory(); | ||
| 830 | DeclareConstantBuffers(); | ||
| 831 | DeclareRegisters(); | ||
| 832 | DeclareTemporaries(); | ||
| 833 | DeclarePredicates(); | ||
| 834 | DeclareInternalFlags(); | ||
| 835 | |||
| 836 | shader_source += code; | ||
| 837 | } | ||
| 838 | |||
| 839 | std::string_view HeaderStageName(ShaderType stage) { | ||
| 840 | switch (stage) { | ||
| 841 | case ShaderType::Vertex: | ||
| 842 | return "vp"; | ||
| 843 | case ShaderType::Geometry: | ||
| 844 | return "gp"; | ||
| 845 | case ShaderType::Fragment: | ||
| 846 | return "fp"; | ||
| 847 | case ShaderType::Compute: | ||
| 848 | return "cp"; | ||
| 849 | default: | ||
| 850 | UNREACHABLE(); | ||
| 851 | return ""; | ||
| 852 | } | ||
| 853 | } | ||
| 854 | |||
| 855 | void ARBDecompiler::DefineGlobalMemory() { | ||
| 856 | u32 binding = 0; | ||
| 857 | for (const auto& pair : ir.GetGlobalMemory()) { | ||
| 858 | const GlobalMemoryBase base = pair.first; | ||
| 859 | global_memory_names.emplace(base, binding); | ||
| 860 | ++binding; | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | void ARBDecompiler::DeclareHeader() { | ||
| 865 | AddLine("!!NV{}5.0", HeaderStageName(stage)); | ||
| 866 | // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D | ||
| 867 | AddLine("OPTION NV_internal;"); | ||
| 868 | AddLine("OPTION NV_gpu_program_fp64;"); | ||
| 869 | AddLine("OPTION NV_shader_thread_group;"); | ||
| 870 | if (ir.UsesWarps() && device.HasWarpIntrinsics()) { | ||
| 871 | AddLine("OPTION NV_shader_thread_shuffle;"); | ||
| 872 | } | ||
| 873 | if (stage == ShaderType::Vertex) { | ||
| 874 | if (device.HasNvViewportArray2()) { | ||
| 875 | AddLine("OPTION NV_viewport_array2;"); | ||
| 876 | } | ||
| 877 | } | ||
| 878 | if (stage == ShaderType::Fragment) { | ||
| 879 | AddLine("OPTION ARB_draw_buffers;"); | ||
| 880 | } | ||
| 881 | if (device.HasImageLoadFormatted()) { | ||
| 882 | AddLine("OPTION EXT_shader_image_load_formatted;"); | ||
| 883 | } | ||
| 884 | } | ||
| 885 | |||
| 886 | void ARBDecompiler::DeclareVertex() { | ||
| 887 | if (stage != ShaderType::Vertex) { | ||
| 888 | return; | ||
| 889 | } | ||
| 890 | AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); | ||
| 891 | } | ||
| 892 | |||
| 893 | void ARBDecompiler::DeclareGeometry() { | ||
| 894 | if (stage != ShaderType::Geometry) { | ||
| 895 | return; | ||
| 896 | } | ||
| 897 | const auto& info = registry.GetGraphicsInfo(); | ||
| 898 | const auto& header = ir.GetHeader(); | ||
| 899 | AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); | ||
| 900 | AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); | ||
| 901 | AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); | ||
| 902 | AddLine("ATTRIB vertex_position = vertex.position;"); | ||
| 903 | } | ||
| 904 | |||
| 905 | void ARBDecompiler::DeclareFragment() { | ||
| 906 | if (stage != ShaderType::Fragment) { | ||
| 907 | return; | ||
| 908 | } | ||
| 909 | AddLine("OUTPUT result_color7 = result.color[7];"); | ||
| 910 | AddLine("OUTPUT result_color6 = result.color[6];"); | ||
| 911 | AddLine("OUTPUT result_color5 = result.color[5];"); | ||
| 912 | AddLine("OUTPUT result_color4 = result.color[4];"); | ||
| 913 | AddLine("OUTPUT result_color3 = result.color[3];"); | ||
| 914 | AddLine("OUTPUT result_color2 = result.color[2];"); | ||
| 915 | AddLine("OUTPUT result_color1 = result.color[1];"); | ||
| 916 | AddLine("OUTPUT result_color0 = result.color;"); | ||
| 917 | } | ||
| 918 | |||
| 919 | void ARBDecompiler::DeclareCompute() { | ||
| 920 | if (stage != ShaderType::Compute) { | ||
| 921 | return; | ||
| 922 | } | ||
| 923 | const ComputeInfo& info = registry.GetComputeInfo(); | ||
| 924 | AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], | ||
| 925 | info.workgroup_size[2]); | ||
| 926 | if (info.shared_memory_size_in_words == 0) { | ||
| 927 | return; | ||
| 928 | } | ||
| 929 | const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||
| 930 | u32 size_in_bytes = info.shared_memory_size_in_words * 4; | ||
| 931 | if (size_in_bytes > limit) { | ||
| 932 | LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", | ||
| 933 | size_in_bytes, limit); | ||
| 934 | size_in_bytes = limit; | ||
| 935 | } | ||
| 936 | |||
| 937 | AddLine("SHARED_MEMORY {};", size_in_bytes); | ||
| 938 | AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | ||
| 939 | } | ||
| 940 | |||
| 941 | void ARBDecompiler::DeclareInputAttributes() { | ||
| 942 | if (stage == ShaderType::Compute) { | ||
| 943 | return; | ||
| 944 | } | ||
| 945 | const std::string_view stage_name = StageInputName(stage); | ||
| 946 | for (const auto attribute : ir.GetInputAttributes()) { | ||
| 947 | if (!IsGenericAttribute(attribute)) { | ||
| 948 | continue; | ||
| 949 | } | ||
| 950 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 951 | |||
| 952 | std::string_view suffix; | ||
| 953 | if (stage == ShaderType::Fragment) { | ||
| 954 | const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; | ||
| 955 | if (input_mode == PixelImap::Unused) { | ||
| 956 | return; | ||
| 957 | } | ||
| 958 | suffix = GetInputFlags(input_mode); | ||
| 959 | } | ||
| 960 | AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, | ||
| 961 | index); | ||
| 962 | } | ||
| 963 | } | ||
| 964 | |||
| 965 | void ARBDecompiler::DeclareOutputAttributes() { | ||
| 966 | if (stage == ShaderType::Compute) { | ||
| 967 | return; | ||
| 968 | } | ||
| 969 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 970 | if (!IsGenericAttribute(attribute)) { | ||
| 971 | continue; | ||
| 972 | } | ||
| 973 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 974 | AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); | ||
| 975 | } | ||
| 976 | } | ||
| 977 | |||
| 978 | void ARBDecompiler::DeclareLocalMemory() { | ||
| 979 | u64 size = 0; | ||
| 980 | if (stage == ShaderType::Compute) { | ||
| 981 | size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 982 | } else { | ||
| 983 | size = ir.GetHeader().GetLocalMemorySize(); | ||
| 984 | } | ||
| 985 | if (size == 0) { | ||
| 986 | return; | ||
| 987 | } | ||
| 988 | const u64 element_count = Common::AlignUp(size, 4) / 4; | ||
| 989 | AddLine("TEMP lmem[{}];", element_count); | ||
| 990 | } | ||
| 991 | |||
| 992 | void ARBDecompiler::DeclareGlobalMemory() { | ||
| 993 | const size_t num_entries = ir.GetGlobalMemory().size(); | ||
| 994 | if (num_entries > 0) { | ||
| 995 | AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); | ||
| 996 | } | ||
| 997 | } | ||
| 998 | |||
| 999 | void ARBDecompiler::DeclareConstantBuffers() { | ||
| 1000 | u32 binding = 0; | ||
| 1001 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 1002 | AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); | ||
| 1003 | ++binding; | ||
| 1004 | } | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | void ARBDecompiler::DeclareRegisters() { | ||
| 1008 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1009 | AddLine("TEMP R{};", gpr); | ||
| 1010 | } | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | void ARBDecompiler::DeclareTemporaries() { | ||
| 1014 | for (std::size_t i = 0; i < max_temporaries; ++i) { | ||
| 1015 | AddLine("TEMP T{};", i); | ||
| 1016 | } | ||
| 1017 | for (std::size_t i = 0; i < max_long_temporaries; ++i) { | ||
| 1018 | AddLine("LONG TEMP L{};", i); | ||
| 1019 | } | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | void ARBDecompiler::DeclarePredicates() { | ||
| 1023 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1024 | AddLine("TEMP P{};", static_cast<u64>(pred)); | ||
| 1025 | } | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | void ARBDecompiler::DeclareInternalFlags() { | ||
| 1029 | for (const char* name : INTERNAL_FLAG_NAMES) { | ||
| 1030 | AddLine("TEMP {};", name); | ||
| 1031 | } | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | void ARBDecompiler::InitializeVariables() { | ||
| 1035 | AddLine("MOV.F32 FSWZA[0], -1;"); | ||
| 1036 | AddLine("MOV.F32 FSWZA[1], 1;"); | ||
| 1037 | AddLine("MOV.F32 FSWZA[2], -1;"); | ||
| 1038 | AddLine("MOV.F32 FSWZA[3], 0;"); | ||
| 1039 | AddLine("MOV.F32 FSWZB[0], -1;"); | ||
| 1040 | AddLine("MOV.F32 FSWZB[1], -1;"); | ||
| 1041 | AddLine("MOV.F32 FSWZB[2], 1;"); | ||
| 1042 | AddLine("MOV.F32 FSWZB[3], -1;"); | ||
| 1043 | |||
| 1044 | if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { | ||
| 1045 | AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); | ||
| 1046 | } | ||
| 1047 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 1048 | if (!IsGenericAttribute(attribute)) { | ||
| 1049 | continue; | ||
| 1050 | } | ||
| 1051 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 1052 | AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); | ||
| 1053 | } | ||
| 1054 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1055 | AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); | ||
| 1056 | } | ||
| 1057 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1058 | AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); | ||
| 1059 | } | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | void ARBDecompiler::DecompileAST() { | ||
| 1063 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 1064 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1065 | AddLine("TEMP F{};", i); | ||
| 1066 | } | ||
| 1067 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1068 | AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | InitializeVariables(); | ||
| 1072 | |||
| 1073 | VisitAST(ir.GetASTProgram()); | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | void ARBDecompiler::DecompileBranchMode() { | ||
| 1077 | static constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 1078 | if (!ir.IsFlowStackDisabled()) { | ||
| 1079 | AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); | ||
| 1080 | AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); | ||
| 1081 | AddLine("TEMP SSY_TOP;"); | ||
| 1082 | AddLine("TEMP PBK_TOP;"); | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | AddLine("TEMP PC;"); | ||
| 1086 | |||
| 1087 | if (!ir.IsFlowStackDisabled()) { | ||
| 1088 | AddLine("MOV.U SSY_TOP.x, 0;"); | ||
| 1089 | AddLine("MOV.U PBK_TOP.x, 0;"); | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | InitializeVariables(); | ||
| 1093 | |||
| 1094 | const auto basic_block_end = ir.GetBasicBlocks().end(); | ||
| 1095 | auto basic_block_it = ir.GetBasicBlocks().begin(); | ||
| 1096 | const u32 first_address = basic_block_it->first; | ||
| 1097 | AddLine("MOV.U PC.x, {};", first_address); | ||
| 1098 | |||
| 1099 | AddLine("REP;"); | ||
| 1100 | |||
| 1101 | std::size_t num_blocks = 0; | ||
| 1102 | while (basic_block_it != basic_block_end) { | ||
| 1103 | const auto& [address, bb] = *basic_block_it; | ||
| 1104 | ++num_blocks; | ||
| 1105 | |||
| 1106 | AddLine("SEQ.S.CC RC.x, PC.x, {};", address); | ||
| 1107 | AddLine("IF NE.x;"); | ||
| 1108 | |||
| 1109 | VisitBlock(bb); | ||
| 1110 | |||
| 1111 | ++basic_block_it; | ||
| 1112 | |||
| 1113 | if (basic_block_it != basic_block_end) { | ||
| 1114 | const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); | ||
| 1115 | if (!op || op->GetCode() != OperationCode::Branch) { | ||
| 1116 | const u32 next_address = basic_block_it->first; | ||
| 1117 | AddLine("MOV.U PC.x, {};", next_address); | ||
| 1118 | AddLine("CONT;"); | ||
| 1119 | } | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | AddLine("ELSE;"); | ||
| 1123 | } | ||
| 1124 | AddLine("RET;"); | ||
| 1125 | while (num_blocks--) { | ||
| 1126 | AddLine("ENDIF;"); | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | AddLine("ENDREP;"); | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | void ARBDecompiler::VisitAST(const ASTNode& node) { | ||
| 1133 | if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { | ||
| 1134 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1135 | VisitAST(current); | ||
| 1136 | } | ||
| 1137 | } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) { | ||
| 1138 | const std::string condition = VisitExpression(if_then->condition); | ||
| 1139 | ResetTemporaries(); | ||
| 1140 | |||
| 1141 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1142 | AddLine("IF NE.x;"); | ||
| 1143 | for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1144 | VisitAST(current); | ||
| 1145 | } | ||
| 1146 | AddLine("ENDIF;"); | ||
| 1147 | } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) { | ||
| 1148 | AddLine("ELSE;"); | ||
| 1149 | for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1150 | VisitAST(current); | ||
| 1151 | } | ||
| 1152 | } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { | ||
| 1153 | VisitBlock(decoded->nodes); | ||
| 1154 | } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) { | ||
| 1155 | AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); | ||
| 1156 | ResetTemporaries(); | ||
| 1157 | } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { | ||
| 1158 | const std::string condition = VisitExpression(do_while->condition); | ||
| 1159 | ResetTemporaries(); | ||
| 1160 | AddLine("REP;"); | ||
| 1161 | for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1162 | VisitAST(current); | ||
| 1163 | } | ||
| 1164 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1165 | AddLine("BRK (NE.x);"); | ||
| 1166 | AddLine("ENDREP;"); | ||
| 1167 | } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) { | ||
| 1168 | const bool is_true = ExprIsTrue(ast_return->condition); | ||
| 1169 | if (!is_true) { | ||
| 1170 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); | ||
| 1171 | AddLine("IF NE.x;"); | ||
| 1172 | ResetTemporaries(); | ||
| 1173 | } | ||
| 1174 | if (ast_return->kills) { | ||
| 1175 | AddLine("KIL TR;"); | ||
| 1176 | } else { | ||
| 1177 | Exit(); | ||
| 1178 | } | ||
| 1179 | if (!is_true) { | ||
| 1180 | AddLine("ENDIF;"); | ||
| 1181 | } | ||
| 1182 | } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) { | ||
| 1183 | if (ExprIsTrue(ast_break->condition)) { | ||
| 1184 | AddLine("BRK;"); | ||
| 1185 | } else { | ||
| 1186 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); | ||
| 1187 | AddLine("BRK (NE.x);"); | ||
| 1188 | ResetTemporaries(); | ||
| 1189 | } | ||
| 1190 | } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { | ||
| 1191 | // Nothing to do | ||
| 1192 | } else { | ||
| 1193 | UNREACHABLE(); | ||
| 1194 | } | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | std::string ARBDecompiler::VisitExpression(const Expr& node) { | ||
| 1198 | if (const auto expr = std::get_if<ExprAnd>(&*node)) { | ||
| 1199 | std::string result = AllocTemporary(); | ||
| 1200 | AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1201 | VisitExpression(expr->operand2)); | ||
| 1202 | return result; | ||
| 1203 | } | ||
| 1204 | if (const auto expr = std::get_if<ExprOr>(&*node)) { | ||
| 1205 | std::string result = AllocTemporary(); | ||
| 1206 | AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1207 | VisitExpression(expr->operand2)); | ||
| 1208 | return result; | ||
| 1209 | } | ||
| 1210 | if (const auto expr = std::get_if<ExprNot>(&*node)) { | ||
| 1211 | std::string result = AllocTemporary(); | ||
| 1212 | AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); | ||
| 1213 | return result; | ||
| 1214 | } | ||
| 1215 | if (const auto expr = std::get_if<ExprPredicate>(&*node)) { | ||
| 1216 | return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); | ||
| 1217 | } | ||
| 1218 | if (const auto expr = std::get_if<ExprCondCode>(&*node)) { | ||
| 1219 | return Visit(ir.GetConditionCode(expr->cc)); | ||
| 1220 | } | ||
| 1221 | if (const auto expr = std::get_if<ExprVar>(&*node)) { | ||
| 1222 | return fmt::format("F{}.x", expr->var_index); | ||
| 1223 | } | ||
| 1224 | if (const auto expr = std::get_if<ExprBoolean>(&*node)) { | ||
| 1225 | return expr->value ? "0xffffffff" : "0"; | ||
| 1226 | } | ||
| 1227 | if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { | ||
| 1228 | std::string result = AllocTemporary(); | ||
| 1229 | AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); | ||
| 1230 | return result; | ||
| 1231 | } | ||
| 1232 | UNREACHABLE(); | ||
| 1233 | return "0"; | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | void ARBDecompiler::VisitBlock(const NodeBlock& bb) { | ||
| 1237 | for (const auto& node : bb) { | ||
| 1238 | Visit(node); | ||
| 1239 | } | ||
| 1240 | } | ||
| 1241 | |||
| 1242 | std::string ARBDecompiler::Visit(const Node& node) { | ||
| 1243 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 1244 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 1245 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1246 | } | ||
| 1247 | const std::size_t index = static_cast<std::size_t>(operation->GetCode()); | ||
| 1248 | if (index >= OPERATION_DECOMPILERS.size()) { | ||
| 1249 | UNREACHABLE_MSG("Out of bounds operation: {}", index); | ||
| 1250 | return {}; | ||
| 1251 | } | ||
| 1252 | const auto decompiler = OPERATION_DECOMPILERS[index]; | ||
| 1253 | if (decompiler == nullptr) { | ||
| 1254 | UNREACHABLE_MSG("Undefined operation: {}", index); | ||
| 1255 | return {}; | ||
| 1256 | } | ||
| 1257 | return (this->*decompiler)(*operation); | ||
| 1258 | } | ||
| 1259 | |||
| 1260 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1261 | const u32 index = gpr->GetIndex(); | ||
| 1262 | if (index == Register::ZeroIndex) { | ||
| 1263 | return "{0, 0, 0, 0}.x"; | ||
| 1264 | } | ||
| 1265 | return fmt::format("R{}.x", index); | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1269 | return fmt::format("CV{}.x", cv->GetIndex()); | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1273 | std::string temporary = AllocTemporary(); | ||
| 1274 | AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); | ||
| 1275 | return temporary; | ||
| 1276 | } | ||
| 1277 | |||
| 1278 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1279 | std::string temporary = AllocTemporary(); | ||
| 1280 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1281 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1282 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1283 | break; | ||
| 1284 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1285 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1286 | break; | ||
| 1287 | default: | ||
| 1288 | AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); | ||
| 1289 | break; | ||
| 1290 | } | ||
| 1291 | if (predicate->IsNegated()) { | ||
| 1292 | AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); | ||
| 1293 | } | ||
| 1294 | return temporary; | ||
| 1295 | } | ||
| 1296 | |||
| 1297 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1298 | if (abuf->IsPhysicalBuffer()) { | ||
| 1299 | UNIMPLEMENTED_MSG("Physical buffers are not implemented"); | ||
| 1300 | return "{0, 0, 0, 0}.x"; | ||
| 1301 | } | ||
| 1302 | |||
| 1303 | const Attribute::Index index = abuf->GetIndex(); | ||
| 1304 | const u32 element = abuf->GetElement(); | ||
| 1305 | const char swizzle = Swizzle(element); | ||
| 1306 | switch (index) { | ||
| 1307 | case Attribute::Index::Position: { | ||
| 1308 | if (stage == ShaderType::Geometry) { | ||
| 1309 | return fmt::format("{}_position[{}].{}", StageInputName(stage), | ||
| 1310 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1311 | } else { | ||
| 1312 | return fmt::format("{}.position.{}", StageInputName(stage), swizzle); | ||
| 1313 | } | ||
| 1314 | } | ||
| 1315 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1316 | ASSERT(stage == ShaderType::Vertex); | ||
| 1317 | switch (element) { | ||
| 1318 | case 2: | ||
| 1319 | return "vertex.instance"; | ||
| 1320 | case 3: | ||
| 1321 | return "vertex.id"; | ||
| 1322 | } | ||
| 1323 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1324 | break; | ||
| 1325 | case Attribute::Index::PointCoord: | ||
| 1326 | switch (element) { | ||
| 1327 | case 0: | ||
| 1328 | return "fragment.pointcoord.x"; | ||
| 1329 | case 1: | ||
| 1330 | return "fragment.pointcoord.y"; | ||
| 1331 | } | ||
| 1332 | UNIMPLEMENTED(); | ||
| 1333 | break; | ||
| 1334 | case Attribute::Index::FrontFacing: { | ||
| 1335 | ASSERT(stage == ShaderType::Fragment); | ||
| 1336 | ASSERT(element == 3); | ||
| 1337 | const std::string temporary = AllocVectorTemporary(); | ||
| 1338 | AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); | ||
| 1339 | AddLine("MOV.U.CC RC.x, -RC;"); | ||
| 1340 | AddLine("MOV.S {}.x, 0;", temporary); | ||
| 1341 | AddLine("MOV.S {}.x (NE.x), -1;", temporary); | ||
| 1342 | return fmt::format("{}.x", temporary); | ||
| 1343 | } | ||
| 1344 | default: | ||
| 1345 | if (IsGenericAttribute(index)) { | ||
| 1346 | if (stage == ShaderType::Geometry) { | ||
| 1347 | return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), | ||
| 1348 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1349 | } else { | ||
| 1350 | return fmt::format("{}.attrib[{}].{}", StageInputName(stage), | ||
| 1351 | GetGenericAttributeIndex(index), swizzle); | ||
| 1352 | } | ||
| 1353 | } | ||
| 1354 | UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); | ||
| 1355 | break; | ||
| 1356 | } | ||
| 1357 | return "{0, 0, 0, 0}.x"; | ||
| 1358 | } | ||
| 1359 | |||
| 1360 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1361 | std::string offset_string; | ||
| 1362 | const auto& offset = cbuf->GetOffset(); | ||
| 1363 | if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1364 | offset_string = std::to_string(imm->GetValue()); | ||
| 1365 | } else { | ||
| 1366 | offset_string = Visit(offset); | ||
| 1367 | } | ||
| 1368 | std::string temporary = AllocTemporary(); | ||
| 1369 | AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); | ||
| 1370 | return temporary; | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1374 | std::string temporary = AllocTemporary(); | ||
| 1375 | AddLine("MOV {}, 0;", temporary); | ||
| 1376 | AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); | ||
| 1377 | return temporary; | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1381 | std::string temporary = Visit(lmem->GetAddress()); | ||
| 1382 | AddLine("SHR.U {}, {}, 2;", temporary, temporary); | ||
| 1383 | AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); | ||
| 1384 | return temporary; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1388 | std::string temporary = Visit(smem->GetAddress()); | ||
| 1389 | AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); | ||
| 1390 | return temporary; | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1394 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1395 | return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1399 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1400 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1401 | } | ||
| 1402 | AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); | ||
| 1403 | AddLine("IF NE.x;"); | ||
| 1404 | VisitBlock(conditional->GetCode()); | ||
| 1405 | AddLine("ENDIF;"); | ||
| 1406 | return {}; | ||
| 1407 | } | ||
| 1408 | |||
| 1409 | if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) { | ||
| 1410 | // Uncommenting this will generate invalid code. GLASM lacks comments. | ||
| 1411 | // AddLine("// {}", cmt->GetText()); | ||
| 1412 | return {}; | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | UNIMPLEMENTED(); | ||
| 1416 | return {}; | ||
| 1417 | } | ||
| 1418 | |||
| 1419 | std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { | ||
| 1420 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1421 | UNIMPLEMENTED_IF(meta.sampler.is_indexed); | ||
| 1422 | |||
| 1423 | const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && | ||
| 1424 | meta.sampler.type == Tegra::Shader::TextureType::TextureCube; | ||
| 1425 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1426 | std::string temporary = AllocVectorTemporary(); | ||
| 1427 | std::size_t i = 0; | ||
| 1428 | for (; i < count; ++i) { | ||
| 1429 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1430 | } | ||
| 1431 | if (meta.sampler.is_array) { | ||
| 1432 | AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); | ||
| 1433 | ++i; | ||
| 1434 | } | ||
| 1435 | if (meta.sampler.is_shadow) { | ||
| 1436 | std::string compare = Visit(meta.depth_compare); | ||
| 1437 | if (is_extended) { | ||
| 1438 | ASSERT(i == 4); | ||
| 1439 | std::string extra_coord = AllocVectorTemporary(); | ||
| 1440 | AddLine("MOV.F {}.x, {};", extra_coord, compare); | ||
| 1441 | return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; | ||
| 1442 | } | ||
| 1443 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); | ||
| 1444 | ++i; | ||
| 1445 | } | ||
| 1446 | return {temporary, temporary, i}; | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | std::string ARBDecompiler::BuildAoffi(Operation operation) { | ||
| 1450 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1451 | if (meta.aoffi.empty()) { | ||
| 1452 | return {}; | ||
| 1453 | } | ||
| 1454 | const std::string temporary = AllocVectorTemporary(); | ||
| 1455 | std::size_t i = 0; | ||
| 1456 | for (auto& node : meta.aoffi) { | ||
| 1457 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); | ||
| 1458 | } | ||
| 1459 | return fmt::format(", offset({})", temporary); | ||
| 1460 | } | ||
| 1461 | |||
| 1462 | std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { | ||
| 1463 | // Read a bindless SSBO, return its address and set CC accordingly | ||
| 1464 | // address = c[binding].xy | ||
| 1465 | // length = c[binding].z | ||
| 1466 | const u32 binding = global_memory_names.at(gmem.GetDescriptor()); | ||
| 1467 | |||
| 1468 | const std::string pointer = AllocLongVectorTemporary(); | ||
| 1469 | std::string temporary = AllocTemporary(); | ||
| 1470 | |||
| 1471 | AddLine("PK64.U {}, c[{}];", pointer, binding); | ||
| 1472 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), | ||
| 1473 | Visit(gmem.GetBaseAddress())); | ||
| 1474 | AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); | ||
| 1475 | AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); | ||
| 1476 | // Compare offset to length and set CC | ||
| 1477 | AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); | ||
| 1478 | return fmt::format("{}.x", pointer); | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | void ARBDecompiler::Exit() { | ||
| 1482 | if (stage != ShaderType::Fragment) { | ||
| 1483 | AddLine("RET;"); | ||
| 1484 | return; | ||
| 1485 | } | ||
| 1486 | |||
| 1487 | const auto safe_get_register = [this](u32 reg) -> std::string { | ||
| 1488 | if (ir.GetRegisters().contains(reg)) { | ||
| 1489 | return fmt::format("R{}.x", reg); | ||
| 1490 | } | ||
| 1491 | return "{0, 0, 0, 0}.x"; | ||
| 1492 | }; | ||
| 1493 | |||
| 1494 | const auto& header = ir.GetHeader(); | ||
| 1495 | u32 current_reg = 0; | ||
| 1496 | for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { | ||
| 1497 | for (u32 component = 0; component < 4; ++component) { | ||
| 1498 | if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 1499 | continue; | ||
| 1500 | } | ||
| 1501 | AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), | ||
| 1502 | safe_get_register(current_reg)); | ||
| 1503 | ++current_reg; | ||
| 1504 | } | ||
| 1505 | } | ||
| 1506 | if (header.ps.omap.depth) { | ||
| 1507 | AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); | ||
| 1508 | } | ||
| 1509 | |||
| 1510 | AddLine("RET;"); | ||
| 1511 | } | ||
| 1512 | |||
| 1513 | std::string ARBDecompiler::Assign(Operation operation) { | ||
| 1514 | const Node& dest = operation[0]; | ||
| 1515 | const Node& src = operation[1]; | ||
| 1516 | |||
| 1517 | std::string dest_name; | ||
| 1518 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1519 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1520 | // Writing to Register::ZeroIndex is a no op | ||
| 1521 | return {}; | ||
| 1522 | } | ||
| 1523 | dest_name = fmt::format("R{}.x", gpr->GetIndex()); | ||
| 1524 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1525 | const u32 element = abuf->GetElement(); | ||
| 1526 | const char swizzle = Swizzle(element); | ||
| 1527 | switch (const Attribute::Index index = abuf->GetIndex()) { | ||
| 1528 | case Attribute::Index::Position: | ||
| 1529 | dest_name = fmt::format("result.position.{}", swizzle); | ||
| 1530 | break; | ||
| 1531 | case Attribute::Index::LayerViewportPointSize: | ||
| 1532 | switch (element) { | ||
| 1533 | case 0: | ||
| 1534 | UNIMPLEMENTED(); | ||
| 1535 | return {}; | ||
| 1536 | case 1: | ||
| 1537 | case 2: | ||
| 1538 | if (!device.HasNvViewportArray2()) { | ||
| 1539 | LOG_ERROR( | ||
| 1540 | Render_OpenGL, | ||
| 1541 | "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); | ||
| 1542 | return {}; | ||
| 1543 | } | ||
| 1544 | dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; | ||
| 1545 | break; | ||
| 1546 | case 3: | ||
| 1547 | dest_name = "result.pointsize.x"; | ||
| 1548 | break; | ||
| 1549 | } | ||
| 1550 | break; | ||
| 1551 | case Attribute::Index::ClipDistances0123: | ||
| 1552 | dest_name = fmt::format("result.clip[{}].x", element); | ||
| 1553 | break; | ||
| 1554 | case Attribute::Index::ClipDistances4567: | ||
| 1555 | dest_name = fmt::format("result.clip[{}].x", element + 4); | ||
| 1556 | break; | ||
| 1557 | default: | ||
| 1558 | if (!IsGenericAttribute(index)) { | ||
| 1559 | UNREACHABLE(); | ||
| 1560 | return {}; | ||
| 1561 | } | ||
| 1562 | dest_name = | ||
| 1563 | fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); | ||
| 1564 | break; | ||
| 1565 | } | ||
| 1566 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1567 | const std::string address = Visit(lmem->GetAddress()); | ||
| 1568 | AddLine("SHR.U {}, {}, 2;", address, address); | ||
| 1569 | dest_name = fmt::format("lmem[{}].x", address); | ||
| 1570 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1571 | AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); | ||
| 1572 | ResetTemporaries(); | ||
| 1573 | return {}; | ||
| 1574 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1575 | AddLine("IF NE.x;"); | ||
| 1576 | AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); | ||
| 1577 | AddLine("ENDIF;"); | ||
| 1578 | ResetTemporaries(); | ||
| 1579 | return {}; | ||
| 1580 | } else { | ||
| 1581 | UNREACHABLE(); | ||
| 1582 | ResetTemporaries(); | ||
| 1583 | return {}; | ||
| 1584 | } | ||
| 1585 | |||
| 1586 | AddLine("MOV.U {}, {};", dest_name, Visit(src)); | ||
| 1587 | ResetTemporaries(); | ||
| 1588 | return {}; | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | std::string ARBDecompiler::Select(Operation operation) { | ||
| 1592 | std::string temporary = AllocTemporary(); | ||
| 1593 | AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), | ||
| 1594 | Visit(operation[2])); | ||
| 1595 | return temporary; | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | std::string ARBDecompiler::FClamp(Operation operation) { | ||
| 1599 | // 1.0f in hex, replace with std::bit_cast on C++20 | ||
| 1600 | static constexpr u32 POSITIVE_ONE = 0x3f800000; | ||
| 1601 | |||
| 1602 | std::string temporary = AllocTemporary(); | ||
| 1603 | const Node& value = operation[0]; | ||
| 1604 | const Node& low = operation[1]; | ||
| 1605 | const Node& high = operation[2]; | ||
| 1606 | const auto* const imm_low = std::get_if<ImmediateNode>(&*low); | ||
| 1607 | const auto* const imm_high = std::get_if<ImmediateNode>(&*high); | ||
| 1608 | if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { | ||
| 1609 | AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); | ||
| 1610 | } else { | ||
| 1611 | AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); | ||
| 1612 | AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); | ||
| 1613 | } | ||
| 1614 | return temporary; | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | std::string ARBDecompiler::FCastHalf0(Operation operation) { | ||
| 1618 | const std::string temporary = AllocVectorTemporary(); | ||
| 1619 | AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1620 | return fmt::format("{}.x", temporary); | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | std::string ARBDecompiler::FCastHalf1(Operation operation) { | ||
| 1624 | const std::string temporary = AllocVectorTemporary(); | ||
| 1625 | AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); | ||
| 1626 | AddLine("MOV {}.x, {}.y;", temporary, temporary); | ||
| 1627 | return fmt::format("{}.x", temporary); | ||
| 1628 | } | ||
| 1629 | |||
| 1630 | std::string ARBDecompiler::FSqrt(Operation operation) { | ||
| 1631 | std::string temporary = AllocTemporary(); | ||
| 1632 | AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); | ||
| 1633 | AddLine("RCP.F32 {}, {};", temporary, temporary); | ||
| 1634 | return temporary; | ||
| 1635 | } | ||
| 1636 | |||
| 1637 | std::string ARBDecompiler::FSwizzleAdd(Operation operation) { | ||
| 1638 | const std::string temporary = AllocVectorTemporary(); | ||
| 1639 | if (!device.HasWarpIntrinsics()) { | ||
| 1640 | LOG_ERROR(Render_OpenGL, | ||
| 1641 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 1642 | AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); | ||
| 1643 | return fmt::format("{}.x", temporary); | ||
| 1644 | } | ||
| 1645 | |||
| 1646 | AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); | ||
| 1647 | AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); | ||
| 1648 | AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); | ||
| 1649 | AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); | ||
| 1650 | AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); | ||
| 1651 | AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); | ||
| 1652 | AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); | ||
| 1653 | return fmt::format("{}.x", temporary); | ||
| 1654 | } | ||
| 1655 | |||
| 1656 | std::string ARBDecompiler::HAdd2(Operation operation) { | ||
| 1657 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1658 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1659 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1660 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1661 | AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1662 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1663 | return fmt::format("{}.x", tmp1); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | std::string ARBDecompiler::HMul2(Operation operation) { | ||
| 1667 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1668 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1669 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1670 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1671 | AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1672 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1673 | return fmt::format("{}.x", tmp1); | ||
| 1674 | } | ||
| 1675 | |||
| 1676 | std::string ARBDecompiler::HFma2(Operation operation) { | ||
| 1677 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1678 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1679 | const std::string tmp3 = AllocVectorTemporary(); | ||
| 1680 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1681 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1682 | AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); | ||
| 1683 | AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); | ||
| 1684 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1685 | return fmt::format("{}.x", tmp1); | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | std::string ARBDecompiler::HAbsolute(Operation operation) { | ||
| 1689 | const std::string temporary = AllocVectorTemporary(); | ||
| 1690 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1691 | AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); | ||
| 1692 | return fmt::format("{}.x", temporary); | ||
| 1693 | } | ||
| 1694 | |||
| 1695 | std::string ARBDecompiler::HNegate(Operation operation) { | ||
| 1696 | const std::string temporary = AllocVectorTemporary(); | ||
| 1697 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1698 | AddLine("MOVC.S RC.x, {};", Visit(operation[1])); | ||
| 1699 | AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); | ||
| 1700 | AddLine("MOVC.S RC.x, {};", Visit(operation[2])); | ||
| 1701 | AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); | ||
| 1702 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1703 | return fmt::format("{}.x", temporary); | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | std::string ARBDecompiler::HClamp(Operation operation) { | ||
| 1707 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1708 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1709 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1710 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); | ||
| 1711 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1712 | AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1713 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); | ||
| 1714 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1715 | AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1716 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1717 | return fmt::format("{}.x", tmp1); | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | std::string ARBDecompiler::HCastFloat(Operation operation) { | ||
| 1721 | const std::string temporary = AllocVectorTemporary(); | ||
| 1722 | AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); | ||
| 1723 | AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1724 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1725 | return fmt::format("{}.x", temporary); | ||
| 1726 | } | ||
| 1727 | |||
| 1728 | std::string ARBDecompiler::HUnpack(Operation operation) { | ||
| 1729 | std::string operand = Visit(operation[0]); | ||
| 1730 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1731 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1732 | return operand; | ||
| 1733 | case Tegra::Shader::HalfType::F32: { | ||
| 1734 | const std::string temporary = AllocVectorTemporary(); | ||
| 1735 | AddLine("MOV.U {}.x, {};", temporary, operand); | ||
| 1736 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1737 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1738 | return fmt::format("{}.x", temporary); | ||
| 1739 | } | ||
| 1740 | case Tegra::Shader::HalfType::H0_H0: { | ||
| 1741 | const std::string temporary = AllocVectorTemporary(); | ||
| 1742 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1743 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1744 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1745 | return fmt::format("{}.x", temporary); | ||
| 1746 | } | ||
| 1747 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1748 | const std::string temporary = AllocVectorTemporary(); | ||
| 1749 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1750 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 1751 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1752 | return fmt::format("{}.x", temporary); | ||
| 1753 | } | ||
| 1754 | } | ||
| 1755 | UNREACHABLE(); | ||
| 1756 | return "{0, 0, 0, 0}.x"; | ||
| 1757 | } | ||
| 1758 | |||
| 1759 | std::string ARBDecompiler::HMergeF32(Operation operation) { | ||
| 1760 | const std::string temporary = AllocVectorTemporary(); | ||
| 1761 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1762 | return fmt::format("{}.x", temporary); | ||
| 1763 | } | ||
| 1764 | |||
| 1765 | std::string ARBDecompiler::HMergeH0(Operation operation) { | ||
| 1766 | const std::string temporary = AllocVectorTemporary(); | ||
| 1767 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1768 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1769 | AddLine("MOV.U {}.x, {}.z;", temporary, temporary); | ||
| 1770 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1771 | return fmt::format("{}.x", temporary); | ||
| 1772 | } | ||
| 1773 | |||
| 1774 | std::string ARBDecompiler::HMergeH1(Operation operation) { | ||
| 1775 | const std::string temporary = AllocVectorTemporary(); | ||
| 1776 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1777 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1778 | AddLine("MOV.U {}.y, {}.w;", temporary, temporary); | ||
| 1779 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1780 | return fmt::format("{}.x", temporary); | ||
| 1781 | } | ||
| 1782 | |||
| 1783 | std::string ARBDecompiler::HPack2(Operation operation) { | ||
| 1784 | const std::string temporary = AllocVectorTemporary(); | ||
| 1785 | AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); | ||
| 1786 | AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); | ||
| 1787 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1788 | return fmt::format("{}.x", temporary); | ||
| 1789 | } | ||
| 1790 | |||
| 1791 | std::string ARBDecompiler::LogicalAssign(Operation operation) { | ||
| 1792 | const Node& dest = operation[0]; | ||
| 1793 | const Node& src = operation[1]; | ||
| 1794 | |||
| 1795 | std::string target; | ||
| 1796 | |||
| 1797 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1798 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1799 | |||
| 1800 | const Tegra::Shader::Pred index = pred->GetIndex(); | ||
| 1801 | switch (index) { | ||
| 1802 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1803 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1804 | // Writing to these predicates is a no-op | ||
| 1805 | return {}; | ||
| 1806 | } | ||
| 1807 | target = fmt::format("P{}.x", static_cast<u64>(index)); | ||
| 1808 | } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1809 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1810 | target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1811 | } else { | ||
| 1812 | UNREACHABLE(); | ||
| 1813 | ResetTemporaries(); | ||
| 1814 | return {}; | ||
| 1815 | } | ||
| 1816 | |||
| 1817 | AddLine("MOV.U {}, {};", target, Visit(src)); | ||
| 1818 | ResetTemporaries(); | ||
| 1819 | return {}; | ||
| 1820 | } | ||
| 1821 | |||
| 1822 | std::string ARBDecompiler::LogicalPick2(Operation operation) { | ||
| 1823 | std::string temporary = AllocTemporary(); | ||
| 1824 | const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); | ||
| 1825 | AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); | ||
| 1826 | return temporary; | ||
| 1827 | } | ||
| 1828 | |||
| 1829 | std::string ARBDecompiler::LogicalAnd2(Operation operation) { | ||
| 1830 | std::string temporary = AllocTemporary(); | ||
| 1831 | const std::string op = Visit(operation[0]); | ||
| 1832 | AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); | ||
| 1833 | return temporary; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | std::string ARBDecompiler::FloatOrdered(Operation operation) { | ||
| 1837 | std::string temporary = AllocTemporary(); | ||
| 1838 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1839 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1840 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1841 | AddLine("MOV.S {} (NAN.x), 0;", temporary); | ||
| 1842 | AddLine("MOV.S {} (NAN.y), 0;", temporary); | ||
| 1843 | return temporary; | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | std::string ARBDecompiler::FloatUnordered(Operation operation) { | ||
| 1847 | std::string temporary = AllocTemporary(); | ||
| 1848 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1849 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1850 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1851 | AddLine("MOV.S {} (NAN.x), -1;", temporary); | ||
| 1852 | AddLine("MOV.S {} (NAN.y), -1;", temporary); | ||
| 1853 | return temporary; | ||
| 1854 | } | ||
| 1855 | |||
| 1856 | std::string ARBDecompiler::LogicalAddCarry(Operation operation) { | ||
| 1857 | std::string temporary = AllocTemporary(); | ||
| 1858 | AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); | ||
| 1859 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1860 | AddLine("IF CF.x;"); | ||
| 1861 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1862 | AddLine("ENDIF;"); | ||
| 1863 | return temporary; | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | std::string ARBDecompiler::Texture(Operation operation) { | ||
| 1867 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1868 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1869 | const auto [coords, temporary, swizzle] = BuildCoords(operation); | ||
| 1870 | |||
| 1871 | std::string_view opcode = "TEX"; | ||
| 1872 | std::string extra; | ||
| 1873 | if (meta.bias) { | ||
| 1874 | ASSERT(!meta.lod); | ||
| 1875 | opcode = "TXB"; | ||
| 1876 | |||
| 1877 | if (swizzle < 4) { | ||
| 1878 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); | ||
| 1879 | } else { | ||
| 1880 | const std::string bias = AllocTemporary(); | ||
| 1881 | AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); | ||
| 1882 | extra = fmt::format(" {},", bias); | ||
| 1883 | } | ||
| 1884 | } | ||
| 1885 | if (meta.lod) { | ||
| 1886 | ASSERT(!meta.bias); | ||
| 1887 | opcode = "TXL"; | ||
| 1888 | |||
| 1889 | if (swizzle < 4) { | ||
| 1890 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1891 | } else { | ||
| 1892 | const std::string lod = AllocTemporary(); | ||
| 1893 | AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); | ||
| 1894 | extra = fmt::format(" {},", lod); | ||
| 1895 | } | ||
| 1896 | } | ||
| 1897 | |||
| 1898 | AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, | ||
| 1899 | TextureType(meta), BuildAoffi(operation)); | ||
| 1900 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1901 | return fmt::format("{}.x", temporary); | ||
| 1902 | } | ||
| 1903 | |||
| 1904 | std::string ARBDecompiler::TextureGather(Operation operation) { | ||
| 1905 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1906 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1907 | const auto [coords, temporary, swizzle] = BuildCoords(operation); | ||
| 1908 | |||
| 1909 | std::string comp; | ||
| 1910 | if (!meta.sampler.is_shadow) { | ||
| 1911 | const auto& immediate = std::get<ImmediateNode>(*meta.component); | ||
| 1912 | comp = fmt::format(".{}", Swizzle(immediate.GetValue())); | ||
| 1913 | } | ||
| 1914 | |||
| 1915 | AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, | ||
| 1916 | TextureType(meta), BuildAoffi(operation)); | ||
| 1917 | AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); | ||
| 1918 | return fmt::format("{}.x", temporary); | ||
| 1919 | } | ||
| 1920 | |||
| 1921 | std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { | ||
| 1922 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1923 | const std::string temporary = AllocVectorTemporary(); | ||
| 1924 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1925 | |||
| 1926 | ASSERT(!meta.sampler.is_array); | ||
| 1927 | |||
| 1928 | const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; | ||
| 1929 | AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); | ||
| 1930 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1931 | return fmt::format("{}.x", temporary); | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | std::string ARBDecompiler::TextureQueryLod(Operation operation) { | ||
| 1935 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1936 | const std::string temporary = AllocVectorTemporary(); | ||
| 1937 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1938 | |||
| 1939 | ASSERT(!meta.sampler.is_array); | ||
| 1940 | |||
| 1941 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1942 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1943 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1944 | } | ||
| 1945 | AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); | ||
| 1946 | AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); | ||
| 1947 | AddLine("TRUNC.S {}, {};", temporary, temporary); | ||
| 1948 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1949 | return fmt::format("{}.x", temporary); | ||
| 1950 | } | ||
| 1951 | |||
| 1952 | std::string ARBDecompiler::TexelFetch(Operation operation) { | ||
| 1953 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1954 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1955 | const auto [coords, temporary, swizzle] = BuildCoords(operation); | ||
| 1956 | |||
| 1957 | if (!meta.sampler.is_buffer) { | ||
| 1958 | ASSERT(swizzle < 4); | ||
| 1959 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1960 | } | ||
| 1961 | AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), | ||
| 1962 | BuildAoffi(operation)); | ||
| 1963 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1964 | return fmt::format("{}.x", temporary); | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | std::string ARBDecompiler::TextureGradient(Operation operation) { | ||
| 1968 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1969 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1970 | const std::string ddx = AllocVectorTemporary(); | ||
| 1971 | const std::string ddy = AllocVectorTemporary(); | ||
| 1972 | const std::string coord = std::get<1>(BuildCoords(operation)); | ||
| 1973 | |||
| 1974 | const std::size_t num_components = meta.derivates.size() / 2; | ||
| 1975 | for (std::size_t index = 0; index < num_components; ++index) { | ||
| 1976 | const char swizzle = Swizzle(index); | ||
| 1977 | AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); | ||
| 1978 | AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | const std::string_view result = coord; | ||
| 1982 | AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, | ||
| 1983 | TextureType(meta), BuildAoffi(operation)); | ||
| 1984 | AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); | ||
| 1985 | return fmt::format("{}.x", result); | ||
| 1986 | } | ||
| 1987 | |||
| 1988 | std::string ARBDecompiler::ImageLoad(Operation operation) { | ||
| 1989 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1990 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1991 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1992 | const std::string_view type = ImageType(meta.image.type); | ||
| 1993 | |||
| 1994 | const std::string temporary = AllocVectorTemporary(); | ||
| 1995 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1996 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1997 | } | ||
| 1998 | AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); | ||
| 1999 | AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 2000 | return fmt::format("{}.x", temporary); | ||
| 2001 | } | ||
| 2002 | |||
| 2003 | std::string ARBDecompiler::ImageStore(Operation operation) { | ||
| 2004 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 2005 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 2006 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 2007 | const std::size_t num_values = meta.values.size(); | ||
| 2008 | const std::string_view type = ImageType(meta.image.type); | ||
| 2009 | |||
| 2010 | const std::string coord = AllocVectorTemporary(); | ||
| 2011 | const std::string value = AllocVectorTemporary(); | ||
| 2012 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 2013 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 2014 | } | ||
| 2015 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 2016 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 2017 | } | ||
| 2018 | AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); | ||
| 2019 | return {}; | ||
| 2020 | } | ||
| 2021 | |||
| 2022 | std::string ARBDecompiler::Branch(Operation operation) { | ||
| 2023 | const auto target = std::get<ImmediateNode>(*operation[0]); | ||
| 2024 | AddLine("MOV.U PC.x, {};", target.GetValue()); | ||
| 2025 | AddLine("CONT;"); | ||
| 2026 | return {}; | ||
| 2027 | } | ||
| 2028 | |||
| 2029 | std::string ARBDecompiler::BranchIndirect(Operation operation) { | ||
| 2030 | AddLine("MOV.U PC.x, {};", Visit(operation[0])); | ||
| 2031 | AddLine("CONT;"); | ||
| 2032 | return {}; | ||
| 2033 | } | ||
| 2034 | |||
| 2035 | std::string ARBDecompiler::PushFlowStack(Operation operation) { | ||
| 2036 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2037 | const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); | ||
| 2038 | const std::string_view stack_name = StackName(stack); | ||
| 2039 | AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); | ||
| 2040 | AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 2041 | return {}; | ||
| 2042 | } | ||
| 2043 | |||
| 2044 | std::string ARBDecompiler::PopFlowStack(Operation operation) { | ||
| 2045 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2046 | const std::string_view stack_name = StackName(stack); | ||
| 2047 | AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 2048 | AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); | ||
| 2049 | AddLine("CONT;"); | ||
| 2050 | return {}; | ||
| 2051 | } | ||
| 2052 | |||
| 2053 | std::string ARBDecompiler::Exit(Operation) { | ||
| 2054 | Exit(); | ||
| 2055 | return {}; | ||
| 2056 | } | ||
| 2057 | |||
| 2058 | std::string ARBDecompiler::Discard(Operation) { | ||
| 2059 | AddLine("KIL TR;"); | ||
| 2060 | return {}; | ||
| 2061 | } | ||
| 2062 | |||
| 2063 | std::string ARBDecompiler::EmitVertex(Operation) { | ||
| 2064 | AddLine("EMIT;"); | ||
| 2065 | return {}; | ||
| 2066 | } | ||
| 2067 | |||
| 2068 | std::string ARBDecompiler::EndPrimitive(Operation) { | ||
| 2069 | AddLine("ENDPRIM;"); | ||
| 2070 | return {}; | ||
| 2071 | } | ||
| 2072 | |||
| 2073 | std::string ARBDecompiler::InvocationId(Operation) { | ||
| 2074 | return "primitive.invocation"; | ||
| 2075 | } | ||
| 2076 | |||
| 2077 | std::string ARBDecompiler::YNegate(Operation) { | ||
| 2078 | LOG_WARNING(Render_OpenGL, "(STUBBED)"); | ||
| 2079 | std::string temporary = AllocTemporary(); | ||
| 2080 | AddLine("MOV.F {}, 1;", temporary); | ||
| 2081 | return temporary; | ||
| 2082 | } | ||
| 2083 | |||
| 2084 | std::string ARBDecompiler::ThreadId(Operation) { | ||
| 2085 | return fmt::format("{}.threadid", StageInputName(stage)); | ||
| 2086 | } | ||
| 2087 | |||
| 2088 | std::string ARBDecompiler::ShuffleIndexed(Operation operation) { | ||
| 2089 | if (!device.HasWarpIntrinsics()) { | ||
| 2090 | LOG_ERROR(Render_OpenGL, | ||
| 2091 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 2092 | return Visit(operation[0]); | ||
| 2093 | } | ||
| 2094 | const std::string temporary = AllocVectorTemporary(); | ||
| 2095 | AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), | ||
| 2096 | Visit(operation[1])); | ||
| 2097 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 2098 | return fmt::format("{}.x", temporary); | ||
| 2099 | } | ||
| 2100 | |||
| 2101 | std::string ARBDecompiler::Barrier(Operation) { | ||
| 2102 | AddLine("BAR;"); | ||
| 2103 | return {}; | ||
| 2104 | } | ||
| 2105 | |||
| 2106 | std::string ARBDecompiler::MemoryBarrierGroup(Operation) { | ||
| 2107 | AddLine("MEMBAR.CTA;"); | ||
| 2108 | return {}; | ||
| 2109 | } | ||
| 2110 | |||
| 2111 | std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { | ||
| 2112 | AddLine("MEMBAR;"); | ||
| 2113 | return {}; | ||
| 2114 | } | ||
| 2115 | |||
| 2116 | } // Anonymous namespace | ||
| 2117 | |||
| 2118 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 2119 | const VideoCommon::Shader::Registry& registry, | ||
| 2120 | Tegra::Engines::ShaderType stage, std::string_view identifier) { | ||
| 2121 | return ARBDecompiler(device, ir, registry, stage, identifier).Code(); | ||
| 2122 | } | ||
| 2123 | |||
| 2124 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h deleted file mode 100644 index 6afc87220..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.h +++ /dev/null | |||
| @@ -1,29 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Tegra::Engines { | ||
| 13 | enum class ShaderType : u32; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | class Registry; | ||
| 19 | } // namespace VideoCommon::Shader | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | class Device; | ||
| 24 | |||
| 25 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 26 | const VideoCommon::Shader::Registry& registry, | ||
| 27 | Tegra::Engines::ShaderType stage, std::string_view identifier); | ||
| 28 | |||
| 29 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ceb3abcb2..3551dbdcc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -54,40 +54,6 @@ namespace { | |||
| 54 | 54 | ||
| 55 | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | 55 | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; |
| 56 | 56 | ||
| 57 | struct TextureHandle { | ||
| 58 | constexpr TextureHandle(u32 data, bool via_header_index) { | ||
| 59 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 60 | image = handle.tic_id; | ||
| 61 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 62 | } | ||
| 63 | |||
| 64 | u32 image; | ||
| 65 | u32 sampler; | ||
| 66 | }; | ||
| 67 | |||
| 68 | template <typename Engine, typename Entry> | ||
| 69 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, | ||
| 70 | ShaderType shader_type, size_t index = 0) { | ||
| 71 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 72 | if (entry.is_separated) { | ||
| 73 | const u32 buffer_1 = entry.buffer; | ||
| 74 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 75 | const u32 offset_1 = entry.offset; | ||
| 76 | const u32 offset_2 = entry.secondary_offset; | ||
| 77 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | ||
| 78 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | ||
| 79 | return TextureHandle(handle_1 | handle_2, via_header_index); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | if (entry.is_bindless) { | ||
| 83 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | ||
| 84 | return TextureHandle(raw, via_header_index); | ||
| 85 | } | ||
| 86 | const u32 buffer = engine.GetBoundBuffer(); | ||
| 87 | const u64 offset = (entry.offset + index) * sizeof(u32); | ||
| 88 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||
| 89 | } | ||
| 90 | |||
| 91 | /// Translates hardware transform feedback indices | 57 | /// Translates hardware transform feedback indices |
| 92 | /// @param location Hardware location | 58 | /// @param location Hardware location |
| 93 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments | 59 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments |
| @@ -119,44 +85,6 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { | |||
| 119 | void oglEnable(GLenum cap, bool state) { | 85 | void oglEnable(GLenum cap, bool state) { |
| 120 | (state ? glEnable : glDisable)(cap); | 86 | (state ? glEnable : glDisable)(cap); |
| 121 | } | 87 | } |
| 122 | |||
| 123 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 124 | if (entry.is_buffer) { | ||
| 125 | return ImageViewType::Buffer; | ||
| 126 | } | ||
| 127 | switch (entry.type) { | ||
| 128 | case Tegra::Shader::TextureType::Texture1D: | ||
| 129 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 130 | case Tegra::Shader::TextureType::Texture2D: | ||
| 131 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 132 | case Tegra::Shader::TextureType::Texture3D: | ||
| 133 | return ImageViewType::e3D; | ||
| 134 | case Tegra::Shader::TextureType::TextureCube: | ||
| 135 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 136 | } | ||
| 137 | UNREACHABLE(); | ||
| 138 | return ImageViewType::e2D; | ||
| 139 | } | ||
| 140 | |||
| 141 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 142 | switch (entry.type) { | ||
| 143 | case Tegra::Shader::ImageType::Texture1D: | ||
| 144 | return ImageViewType::e1D; | ||
| 145 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 146 | return ImageViewType::e1DArray; | ||
| 147 | case Tegra::Shader::ImageType::Texture2D: | ||
| 148 | return ImageViewType::e2D; | ||
| 149 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 150 | return ImageViewType::e2DArray; | ||
| 151 | case Tegra::Shader::ImageType::Texture3D: | ||
| 152 | return ImageViewType::e3D; | ||
| 153 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 154 | return ImageViewType::Buffer; | ||
| 155 | } | ||
| 156 | UNREACHABLE(); | ||
| 157 | return ImageViewType::e2D; | ||
| 158 | } | ||
| 159 | |||
| 160 | } // Anonymous namespace | 88 | } // Anonymous namespace |
| 161 | 89 | ||
| 162 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 90 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| @@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 172 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 100 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 173 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | 101 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), |
| 174 | query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), | 102 | query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), |
| 175 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 103 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} |
| 176 | async_shaders(emu_window_) { | ||
| 177 | if (device.UseAsynchronousShaders()) { | ||
| 178 | async_shaders.AllocateWorkers(); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | 104 | ||
| 182 | RasterizerOpenGL::~RasterizerOpenGL() = default; | 105 | RasterizerOpenGL::~RasterizerOpenGL() = default; |
| 183 | 106 | ||
| @@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() { | |||
| 244 | } | 167 | } |
| 245 | } | 168 | } |
| 246 | 169 | ||
| 247 | void RasterizerOpenGL::SetupShaders(bool is_indexed) { | ||
| 248 | u32 clip_distances = 0; | ||
| 249 | |||
| 250 | std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; | ||
| 251 | image_view_indices.clear(); | ||
| 252 | sampler_handles.clear(); | ||
| 253 | |||
| 254 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 255 | |||
| 256 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 257 | const auto& shader_config = maxwell3d.regs.shader_config[index]; | ||
| 258 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 259 | |||
| 260 | // Skip stages that are not enabled | ||
| 261 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 262 | switch (program) { | ||
| 263 | case Maxwell::ShaderProgram::Geometry: | ||
| 264 | program_manager.UseGeometryShader(0); | ||
| 265 | break; | ||
| 266 | case Maxwell::ShaderProgram::Fragment: | ||
| 267 | program_manager.UseFragmentShader(0); | ||
| 268 | break; | ||
| 269 | default: | ||
| 270 | break; | ||
| 271 | } | ||
| 272 | continue; | ||
| 273 | } | ||
| 274 | // Currently this stages are not supported in the OpenGL backend. | ||
| 275 | // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL | ||
| 276 | if (program == Maxwell::ShaderProgram::TesselationControl || | ||
| 277 | program == Maxwell::ShaderProgram::TesselationEval) { | ||
| 278 | continue; | ||
| 279 | } | ||
| 280 | |||
| 281 | Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); | ||
| 282 | const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; | ||
| 283 | switch (program) { | ||
| 284 | case Maxwell::ShaderProgram::VertexA: | ||
| 285 | case Maxwell::ShaderProgram::VertexB: | ||
| 286 | program_manager.UseVertexShader(program_handle); | ||
| 287 | break; | ||
| 288 | case Maxwell::ShaderProgram::Geometry: | ||
| 289 | program_manager.UseGeometryShader(program_handle); | ||
| 290 | break; | ||
| 291 | case Maxwell::ShaderProgram::Fragment: | ||
| 292 | program_manager.UseFragmentShader(program_handle); | ||
| 293 | break; | ||
| 294 | default: | ||
| 295 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | ||
| 296 | shader_config.enable.Value(), shader_config.offset); | ||
| 297 | break; | ||
| 298 | } | ||
| 299 | |||
| 300 | // Stage indices are 0 - 5 | ||
| 301 | const size_t stage = index == 0 ? 0 : index - 1; | ||
| 302 | shaders[stage] = shader; | ||
| 303 | |||
| 304 | SetupDrawTextures(shader, stage); | ||
| 305 | SetupDrawImages(shader, stage); | ||
| 306 | |||
| 307 | buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); | ||
| 308 | |||
| 309 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 310 | u32 ssbo_index = 0; | ||
| 311 | for (const auto& buffer : shader->GetEntries().global_memory_entries) { | ||
| 312 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 313 | buffer.cbuf_offset, buffer.is_written); | ||
| 314 | ++ssbo_index; | ||
| 315 | } | ||
| 316 | |||
| 317 | // Workaround for Intel drivers. | ||
| 318 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | ||
| 319 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | ||
| 320 | // clip distances only when it's written by a shader stage. | ||
| 321 | clip_distances |= shader->GetEntries().clip_distances; | ||
| 322 | |||
| 323 | // When VertexA is enabled, we have dual vertex shaders | ||
| 324 | if (program == Maxwell::ShaderProgram::VertexA) { | ||
| 325 | // VertexB was combined with VertexA, so we skip the VertexB iteration | ||
| 326 | ++index; | ||
| 327 | } | ||
| 328 | } | ||
| 329 | SyncClipEnabled(clip_distances); | ||
| 330 | maxwell3d.dirty.flags[Dirty::Shaders] = false; | ||
| 331 | |||
| 332 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 333 | |||
| 334 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 335 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 336 | |||
| 337 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 338 | |||
| 339 | size_t image_view_index = 0; | ||
| 340 | size_t texture_index = 0; | ||
| 341 | size_t image_index = 0; | ||
| 342 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 343 | const Shader* const shader = shaders[stage]; | ||
| 344 | if (!shader) { | ||
| 345 | continue; | ||
| 346 | } | ||
| 347 | buffer_cache.BindHostStageBuffers(stage); | ||
| 348 | const auto& base = device.GetBaseBindings(stage); | ||
| 349 | BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, | ||
| 350 | texture_index, image_index); | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 170 | void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 355 | const VideoCore::DiskResourceLoadCallback& callback) { | 171 | const VideoCore::DiskResourceLoadCallback& callback) {} |
| 356 | shader_cache.LoadDiskCache(title_id, stop_loading, callback); | ||
| 357 | } | ||
| 358 | 172 | ||
| 359 | void RasterizerOpenGL::Clear() { | 173 | void RasterizerOpenGL::Clear() { |
| 360 | MICROPROFILE_SCOPE(OpenGL_Clears); | 174 | MICROPROFILE_SCOPE(OpenGL_Clears); |
| @@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 434 | 248 | ||
| 435 | // Setup shaders and their used resources. | 249 | // Setup shaders and their used resources. |
| 436 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 250 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 437 | SetupShaders(is_indexed); | ||
| 438 | 251 | ||
| 439 | texture_cache.UpdateRenderTargets(false); | 252 | texture_cache.UpdateRenderTargets(false); |
| 440 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | 253 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); |
| @@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 488 | gpu.TickWork(); | 301 | gpu.TickWork(); |
| 489 | } | 302 | } |
| 490 | 303 | ||
| 491 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 304 | void RasterizerOpenGL::DispatchCompute() { |
| 492 | Shader* const kernel = shader_cache.GetComputeKernel(code_addr); | 305 | UNREACHABLE_MSG("Not implemented"); |
| 493 | |||
| 494 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 495 | BindComputeTextures(kernel); | ||
| 496 | |||
| 497 | const auto& entries = kernel->GetEntries(); | ||
| 498 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||
| 499 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 500 | u32 ssbo_index = 0; | ||
| 501 | for (const auto& buffer : entries.global_memory_entries) { | ||
| 502 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||
| 503 | buffer.is_written); | ||
| 504 | ++ssbo_index; | ||
| 505 | } | ||
| 506 | buffer_cache.UpdateComputeBuffers(); | ||
| 507 | buffer_cache.BindHostComputeBuffers(); | ||
| 508 | |||
| 509 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 510 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | ||
| 511 | ++num_queued_commands; | ||
| 512 | } | 306 | } |
| 513 | 307 | ||
| 514 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | 308 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { |
| @@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 726 | return true; | 520 | return true; |
| 727 | } | 521 | } |
| 728 | 522 | ||
| 729 | void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { | ||
| 730 | image_view_indices.clear(); | ||
| 731 | sampler_handles.clear(); | ||
| 732 | |||
| 733 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 734 | |||
| 735 | SetupComputeTextures(kernel); | ||
| 736 | SetupComputeImages(kernel); | ||
| 737 | |||
| 738 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 739 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 740 | |||
| 741 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 742 | size_t image_view_index = 0; | ||
| 743 | size_t texture_index = 0; | ||
| 744 | size_t image_index = 0; | ||
| 745 | BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); | ||
| 746 | } | ||
| 747 | |||
| 748 | void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, | ||
| 749 | GLuint base_image, size_t& image_view_index, | ||
| 750 | size_t& texture_index, size_t& image_index) { | ||
| 751 | const GLuint* const samplers = sampler_handles.data() + texture_index; | ||
| 752 | const GLuint* const textures = texture_handles.data() + texture_index; | ||
| 753 | const GLuint* const images = image_handles.data() + image_index; | ||
| 754 | |||
| 755 | const size_t num_samplers = entries.samplers.size(); | ||
| 756 | for (const auto& sampler : entries.samplers) { | ||
| 757 | for (size_t i = 0; i < sampler.size; ++i) { | ||
| 758 | const ImageViewId image_view_id = image_view_ids[image_view_index++]; | ||
| 759 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 760 | const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); | ||
| 761 | texture_handles[texture_index++] = handle; | ||
| 762 | } | ||
| 763 | } | ||
| 764 | const size_t num_images = entries.images.size(); | ||
| 765 | for (size_t unit = 0; unit < num_images; ++unit) { | ||
| 766 | // TODO: Mark as modified | ||
| 767 | const ImageViewId image_view_id = image_view_ids[image_view_index++]; | ||
| 768 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 769 | const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); | ||
| 770 | image_handles[image_index] = handle; | ||
| 771 | ++image_index; | ||
| 772 | } | ||
| 773 | if (num_samplers > 0) { | ||
| 774 | glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers); | ||
| 775 | glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures); | ||
| 776 | } | ||
| 777 | if (num_images > 0) { | ||
| 778 | glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images); | ||
| 779 | } | ||
| 780 | } | ||
| 781 | |||
| 782 | void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { | ||
| 783 | const bool via_header_index = | ||
| 784 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 785 | for (const auto& entry : shader->GetEntries().samplers) { | ||
| 786 | const auto shader_type = static_cast<ShaderType>(stage_index); | ||
| 787 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 788 | const auto handle = | ||
| 789 | GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); | ||
| 790 | const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 791 | sampler_handles.push_back(sampler->Handle()); | ||
| 792 | image_view_indices.push_back(handle.image); | ||
| 793 | } | ||
| 794 | } | ||
| 795 | } | ||
| 796 | |||
| 797 | void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { | ||
| 798 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 799 | for (const auto& entry : kernel->GetEntries().samplers) { | ||
| 800 | for (size_t i = 0; i < entry.size; ++i) { | ||
| 801 | const auto handle = | ||
| 802 | GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); | ||
| 803 | const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 804 | sampler_handles.push_back(sampler->Handle()); | ||
| 805 | image_view_indices.push_back(handle.image); | ||
| 806 | } | ||
| 807 | } | ||
| 808 | } | ||
| 809 | |||
| 810 | void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { | ||
| 811 | const bool via_header_index = | ||
| 812 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 813 | for (const auto& entry : shader->GetEntries().images) { | ||
| 814 | const auto shader_type = static_cast<ShaderType>(stage_index); | ||
| 815 | const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); | ||
| 816 | image_view_indices.push_back(handle.image); | ||
| 817 | } | ||
| 818 | } | ||
| 819 | |||
| 820 | void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { | ||
| 821 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 822 | for (const auto& entry : shader->GetEntries().images) { | ||
| 823 | const auto handle = | ||
| 824 | GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); | ||
| 825 | image_view_indices.push_back(handle.image); | ||
| 826 | } | ||
| 827 | } | ||
| 828 | |||
| 829 | void RasterizerOpenGL::SyncState() { | 523 | void RasterizerOpenGL::SyncState() { |
| 830 | SyncViewport(); | 524 | SyncViewport(); |
| 831 | SyncRasterizeEnable(); | 525 | SyncRasterizeEnable(); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d30ad698f..1f58f8791 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -28,11 +28,9 @@ | |||
| 28 | #include "video_core/renderer_opengl/gl_query_cache.h" | 28 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 29 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 31 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 33 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 32 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 33 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 35 | #include "video_core/shader/async_shaders.h" | ||
| 36 | #include "video_core/textures/texture.h" | 34 | #include "video_core/textures/texture.h" |
| 37 | 35 | ||
| 38 | namespace Core::Memory { | 36 | namespace Core::Memory { |
| @@ -81,7 +79,7 @@ public: | |||
| 81 | 79 | ||
| 82 | void Draw(bool is_indexed, bool is_instanced) override; | 80 | void Draw(bool is_indexed, bool is_instanced) override; |
| 83 | void Clear() override; | 81 | void Clear() override; |
| 84 | void DispatchCompute(GPUVAddr code_addr) override; | 82 | void DispatchCompute() override; |
| 85 | void ResetCounter(VideoCore::QueryType type) override; | 83 | void ResetCounter(VideoCore::QueryType type) override; |
| 86 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 84 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 87 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 85 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| @@ -118,36 +116,11 @@ public: | |||
| 118 | return num_queued_commands > 0; | 116 | return num_queued_commands > 0; |
| 119 | } | 117 | } |
| 120 | 118 | ||
| 121 | VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { | ||
| 122 | return async_shaders; | ||
| 123 | } | ||
| 124 | |||
| 125 | const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { | ||
| 126 | return async_shaders; | ||
| 127 | } | ||
| 128 | |||
| 129 | private: | 119 | private: |
| 130 | static constexpr size_t MAX_TEXTURES = 192; | 120 | static constexpr size_t MAX_TEXTURES = 192; |
| 131 | static constexpr size_t MAX_IMAGES = 48; | 121 | static constexpr size_t MAX_IMAGES = 48; |
| 132 | static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; | 122 | static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; |
| 133 | 123 | ||
| 134 | void BindComputeTextures(Shader* kernel); | ||
| 135 | |||
| 136 | void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, | ||
| 137 | size_t& image_view_index, size_t& texture_index, size_t& image_index); | ||
| 138 | |||
| 139 | /// Configures the current textures to use for the draw command. | ||
| 140 | void SetupDrawTextures(const Shader* shader, size_t stage_index); | ||
| 141 | |||
| 142 | /// Configures the textures used in a compute shader. | ||
| 143 | void SetupComputeTextures(const Shader* kernel); | ||
| 144 | |||
| 145 | /// Configures images in a graphics shader. | ||
| 146 | void SetupDrawImages(const Shader* shader, size_t stage_index); | ||
| 147 | |||
| 148 | /// Configures images in a compute shader. | ||
| 149 | void SetupComputeImages(const Shader* shader); | ||
| 150 | |||
| 151 | /// Syncs state to match guest's | 124 | /// Syncs state to match guest's |
| 152 | void SyncState(); | 125 | void SyncState(); |
| 153 | 126 | ||
| @@ -230,8 +203,6 @@ private: | |||
| 230 | /// End a transform feedback | 203 | /// End a transform feedback |
| 231 | void EndTransformFeedback(); | 204 | void EndTransformFeedback(); |
| 232 | 205 | ||
| 233 | void SetupShaders(bool is_indexed); | ||
| 234 | |||
| 235 | Tegra::GPU& gpu; | 206 | Tegra::GPU& gpu; |
| 236 | Tegra::Engines::Maxwell3D& maxwell3d; | 207 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 237 | Tegra::Engines::KeplerCompute& kepler_compute; | 208 | Tegra::Engines::KeplerCompute& kepler_compute; |
| @@ -251,8 +222,6 @@ private: | |||
| 251 | AccelerateDMA accelerate_dma; | 222 | AccelerateDMA accelerate_dma; |
| 252 | FenceManagerOpenGL fence_manager; | 223 | FenceManagerOpenGL fence_manager; |
| 253 | 224 | ||
| 254 | VideoCommon::Shader::AsyncShaders async_shaders; | ||
| 255 | |||
| 256 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | 225 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 257 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | 226 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |
| 258 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; | 227 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5a01c59ec..4dd166156 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -20,307 +20,19 @@ | |||
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/memory_manager.h" | 22 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 25 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 24 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 26 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 25 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | ||
| 29 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 26 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 30 | #include "video_core/shader/memory_util.h" | ||
| 31 | #include "video_core/shader/registry.h" | ||
| 32 | #include "video_core/shader/shader_ir.h" | ||
| 33 | #include "video_core/shader_cache.h" | 27 | #include "video_core/shader_cache.h" |
| 34 | #include "video_core/shader_notify.h" | 28 | #include "video_core/shader_notify.h" |
| 35 | 29 | ||
| 36 | namespace OpenGL { | 30 | namespace OpenGL { |
| 37 | 31 | ||
| 38 | using Tegra::Engines::ShaderType; | 32 | Shader::Shader() = default; |
| 39 | using VideoCommon::Shader::GetShaderAddress; | ||
| 40 | using VideoCommon::Shader::GetShaderCode; | ||
| 41 | using VideoCommon::Shader::GetUniqueIdentifier; | ||
| 42 | using VideoCommon::Shader::KERNEL_MAIN_OFFSET; | ||
| 43 | using VideoCommon::Shader::ProgramCode; | ||
| 44 | using VideoCommon::Shader::Registry; | ||
| 45 | using VideoCommon::Shader::ShaderIR; | ||
| 46 | using VideoCommon::Shader::STAGE_MAIN_OFFSET; | ||
| 47 | |||
| 48 | namespace { | ||
| 49 | |||
| 50 | constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; | ||
| 51 | |||
| 52 | /// Gets the shader type from a Maxwell program type | ||
| 53 | constexpr GLenum GetGLShaderType(ShaderType shader_type) { | ||
| 54 | switch (shader_type) { | ||
| 55 | case ShaderType::Vertex: | ||
| 56 | return GL_VERTEX_SHADER; | ||
| 57 | case ShaderType::Geometry: | ||
| 58 | return GL_GEOMETRY_SHADER; | ||
| 59 | case ShaderType::Fragment: | ||
| 60 | return GL_FRAGMENT_SHADER; | ||
| 61 | case ShaderType::Compute: | ||
| 62 | return GL_COMPUTE_SHADER; | ||
| 63 | default: | ||
| 64 | return GL_NONE; | ||
| 65 | } | ||
| 66 | } | ||
| 67 | |||
| 68 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { | ||
| 69 | switch (shader_type) { | ||
| 70 | case ShaderType::Vertex: | ||
| 71 | return "VS"; | ||
| 72 | case ShaderType::TesselationControl: | ||
| 73 | return "HS"; | ||
| 74 | case ShaderType::TesselationEval: | ||
| 75 | return "DS"; | ||
| 76 | case ShaderType::Geometry: | ||
| 77 | return "GS"; | ||
| 78 | case ShaderType::Fragment: | ||
| 79 | return "FS"; | ||
| 80 | case ShaderType::Compute: | ||
| 81 | return "CS"; | ||
| 82 | } | ||
| 83 | return "UNK"; | ||
| 84 | } | ||
| 85 | |||
| 86 | constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { | ||
| 87 | switch (program_type) { | ||
| 88 | case Maxwell::ShaderProgram::VertexA: | ||
| 89 | case Maxwell::ShaderProgram::VertexB: | ||
| 90 | return ShaderType::Vertex; | ||
| 91 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 92 | return ShaderType::TesselationControl; | ||
| 93 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 94 | return ShaderType::TesselationEval; | ||
| 95 | case Maxwell::ShaderProgram::Geometry: | ||
| 96 | return ShaderType::Geometry; | ||
| 97 | case Maxwell::ShaderProgram::Fragment: | ||
| 98 | return ShaderType::Fragment; | ||
| 99 | } | ||
| 100 | return {}; | ||
| 101 | } | ||
| 102 | |||
| 103 | constexpr GLenum AssemblyEnum(ShaderType shader_type) { | ||
| 104 | switch (shader_type) { | ||
| 105 | case ShaderType::Vertex: | ||
| 106 | return GL_VERTEX_PROGRAM_NV; | ||
| 107 | case ShaderType::TesselationControl: | ||
| 108 | return GL_TESS_CONTROL_PROGRAM_NV; | ||
| 109 | case ShaderType::TesselationEval: | ||
| 110 | return GL_TESS_EVALUATION_PROGRAM_NV; | ||
| 111 | case ShaderType::Geometry: | ||
| 112 | return GL_GEOMETRY_PROGRAM_NV; | ||
| 113 | case ShaderType::Fragment: | ||
| 114 | return GL_FRAGMENT_PROGRAM_NV; | ||
| 115 | case ShaderType::Compute: | ||
| 116 | return GL_COMPUTE_PROGRAM_NV; | ||
| 117 | } | ||
| 118 | return {}; | ||
| 119 | } | ||
| 120 | |||
| 121 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { | ||
| 122 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); | ||
| 123 | } | ||
| 124 | |||
| 125 | std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | ||
| 126 | const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; | ||
| 127 | const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, | ||
| 128 | entry.graphics_info, entry.compute_info}; | ||
| 129 | auto registry = std::make_shared<Registry>(entry.type, info); | ||
| 130 | for (const auto& [address, value] : entry.keys) { | ||
| 131 | const auto [buffer, offset] = address; | ||
| 132 | registry->InsertKey(buffer, offset, value); | ||
| 133 | } | ||
| 134 | for (const auto& [offset, sampler] : entry.bound_samplers) { | ||
| 135 | registry->InsertBoundSampler(offset, sampler); | ||
| 136 | } | ||
| 137 | for (const auto& [key, sampler] : entry.bindless_samplers) { | ||
| 138 | const auto [buffer, offset] = key; | ||
| 139 | registry->InsertBindlessSampler(buffer, offset, sampler); | ||
| 140 | } | ||
| 141 | return registry; | ||
| 142 | } | ||
| 143 | |||
| 144 | std::unordered_set<GLenum> GetSupportedFormats() { | ||
| 145 | GLint num_formats; | ||
| 146 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | ||
| 147 | |||
| 148 | std::vector<GLint> formats(num_formats); | ||
| 149 | glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); | ||
| 150 | |||
| 151 | std::unordered_set<GLenum> supported_formats; | ||
| 152 | for (const GLint format : formats) { | ||
| 153 | supported_formats.insert(static_cast<GLenum>(format)); | ||
| 154 | } | ||
| 155 | return supported_formats; | ||
| 156 | } | ||
| 157 | |||
| 158 | } // Anonymous namespace | ||
| 159 | |||
| 160 | ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, | ||
| 161 | const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { | ||
| 162 | if (device.UseDriverCache()) { | ||
| 163 | // Ignore hint retrievable if we are using the driver cache | ||
| 164 | hint_retrievable = false; | ||
| 165 | } | ||
| 166 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); | ||
| 167 | LOG_INFO(Render_OpenGL, "{}", shader_id); | ||
| 168 | |||
| 169 | auto program = std::make_shared<ProgramHandle>(); | ||
| 170 | |||
| 171 | if (device.UseAssemblyShaders()) { | ||
| 172 | const std::string arb = | ||
| 173 | DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); | ||
| 174 | |||
| 175 | GLuint& arb_prog = program->assembly_program.handle; | ||
| 176 | |||
| 177 | // Commented out functions signal OpenGL errors but are compatible with apitrace. | ||
| 178 | // Use them only to capture and replay on apitrace. | ||
| 179 | #if 0 | ||
| 180 | glGenProgramsNV(1, &arb_prog); | ||
| 181 | glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()), | ||
| 182 | reinterpret_cast<const GLubyte*>(arb.data())); | ||
| 183 | #else | ||
| 184 | glGenProgramsARB(1, &arb_prog); | ||
| 185 | glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, | ||
| 186 | static_cast<GLsizei>(arb.size()), arb.data()); | ||
| 187 | #endif | ||
| 188 | const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); | ||
| 189 | if (err && *err) { | ||
| 190 | LOG_CRITICAL(Render_OpenGL, "{}", err); | ||
| 191 | LOG_INFO(Render_OpenGL, "\n{}", arb); | ||
| 192 | } | ||
| 193 | } else { | ||
| 194 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | ||
| 195 | OGLShader shader; | ||
| 196 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | ||
| 197 | |||
| 198 | program->source_program.Create(true, hint_retrievable, shader.handle); | ||
| 199 | } | ||
| 200 | |||
| 201 | return program; | ||
| 202 | } | ||
| 203 | |||
| 204 | Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_, | ||
| 205 | ProgramSharedPtr program_, bool is_built_) | ||
| 206 | : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, | ||
| 207 | is_built{is_built_} { | ||
| 208 | handle = program->assembly_program.handle; | ||
| 209 | if (handle == 0) { | ||
| 210 | handle = program->source_program.handle; | ||
| 211 | } | ||
| 212 | if (is_built) { | ||
| 213 | ASSERT(handle != 0); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | 33 | ||
| 217 | Shader::~Shader() = default; | 34 | Shader::~Shader() = default; |
| 218 | 35 | ||
| 219 | GLuint Shader::GetHandle() const { | ||
| 220 | DEBUG_ASSERT(registry->IsConsistent()); | ||
| 221 | return handle; | ||
| 222 | } | ||
| 223 | |||
| 224 | bool Shader::IsBuilt() const { | ||
| 225 | return is_built; | ||
| 226 | } | ||
| 227 | |||
| 228 | void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { | ||
| 229 | program->source_program = std::move(new_program); | ||
| 230 | handle = program->source_program.handle; | ||
| 231 | is_built = true; | ||
| 232 | } | ||
| 233 | |||
| 234 | void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { | ||
| 235 | program->assembly_program = std::move(new_program); | ||
| 236 | handle = program->assembly_program.handle; | ||
| 237 | is_built = true; | ||
| 238 | } | ||
| 239 | |||
| 240 | std::unique_ptr<Shader> Shader::CreateStageFromMemory( | ||
| 241 | const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, | ||
| 242 | ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { | ||
| 243 | const auto shader_type = GetShaderType(program_type); | ||
| 244 | |||
| 245 | auto& gpu = params.gpu; | ||
| 246 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 247 | |||
| 248 | auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); | ||
| 249 | if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { | ||
| 250 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||
| 251 | // TODO(Rodrigo): Handle VertexA shaders | ||
| 252 | // std::optional<ShaderIR> ir_b; | ||
| 253 | // if (!code_b.empty()) { | ||
| 254 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); | ||
| 255 | // } | ||
| 256 | auto program = | ||
| 257 | BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); | ||
| 258 | ShaderDiskCacheEntry entry; | ||
| 259 | entry.type = shader_type; | ||
| 260 | entry.code = std::move(code); | ||
| 261 | entry.code_b = std::move(code_b); | ||
| 262 | entry.unique_identifier = params.unique_identifier; | ||
| 263 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 264 | entry.graphics_info = registry->GetGraphicsInfo(); | ||
| 265 | entry.keys = registry->GetKeys(); | ||
| 266 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 267 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 268 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 269 | |||
| 270 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 271 | |||
| 272 | return std::unique_ptr<Shader>(new Shader(std::move(registry), | ||
| 273 | MakeEntries(params.device, ir, shader_type), | ||
| 274 | std::move(program), true)); | ||
| 275 | } else { | ||
| 276 | // Required for entries | ||
| 277 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||
| 278 | auto entries = MakeEntries(params.device, ir, shader_type); | ||
| 279 | |||
| 280 | async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, | ||
| 281 | std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, | ||
| 282 | COMPILER_SETTINGS, *registry, cpu_addr); | ||
| 283 | |||
| 284 | auto program = std::make_shared<ProgramHandle>(); | ||
| 285 | return std::unique_ptr<Shader>( | ||
| 286 | new Shader(std::move(registry), std::move(entries), std::move(program), false)); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, | ||
| 291 | ProgramCode code) { | ||
| 292 | auto& gpu = params.gpu; | ||
| 293 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 294 | |||
| 295 | auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine); | ||
| 296 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||
| 297 | const u64 uid = params.unique_identifier; | ||
| 298 | auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); | ||
| 299 | |||
| 300 | ShaderDiskCacheEntry entry; | ||
| 301 | entry.type = ShaderType::Compute; | ||
| 302 | entry.code = std::move(code); | ||
| 303 | entry.unique_identifier = uid; | ||
| 304 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 305 | entry.compute_info = registry->GetComputeInfo(); | ||
| 306 | entry.keys = registry->GetKeys(); | ||
| 307 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 308 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 309 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 310 | |||
| 311 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 312 | |||
| 313 | return std::unique_ptr<Shader>(new Shader(std::move(registry), | ||
| 314 | MakeEntries(params.device, ir, ShaderType::Compute), | ||
| 315 | std::move(program))); | ||
| 316 | } | ||
| 317 | |||
| 318 | std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, | ||
| 319 | const PrecompiledShader& precompiled_shader) { | ||
| 320 | return std::unique_ptr<Shader>(new Shader( | ||
| 321 | precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); | ||
| 322 | } | ||
| 323 | |||
| 324 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | 36 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, |
| 325 | Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 37 | Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 326 | Tegra::Engines::Maxwell3D& maxwell3d_, | 38 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| @@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | |||
| 331 | 43 | ||
| 332 | ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; | 44 | ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; |
| 333 | 45 | ||
| 334 | void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, | ||
| 335 | const VideoCore::DiskResourceLoadCallback& callback) { | ||
| 336 | disk_cache.BindTitleID(title_id); | ||
| 337 | const std::optional transferable = disk_cache.LoadTransferable(); | ||
| 338 | |||
| 339 | LOG_INFO(Render_OpenGL, "Total Shader Count: {}", | ||
| 340 | transferable.has_value() ? transferable->size() : 0); | ||
| 341 | |||
| 342 | if (!transferable) { | ||
| 343 | return; | ||
| 344 | } | ||
| 345 | |||
| 346 | std::vector<ShaderDiskCachePrecompiled> gl_cache; | ||
| 347 | if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { | ||
| 348 | // Only load precompiled cache when we are not using assembly shaders | ||
| 349 | gl_cache = disk_cache.LoadPrecompiled(); | ||
| 350 | } | ||
| 351 | const auto supported_formats = GetSupportedFormats(); | ||
| 352 | |||
| 353 | // Track if precompiled cache was altered during loading to know if we have to | ||
| 354 | // serialize the virtual precompiled cache file back to the hard drive | ||
| 355 | bool precompiled_cache_altered = false; | ||
| 356 | |||
| 357 | // Inform the frontend about shader build initialization | ||
| 358 | if (callback) { | ||
| 359 | callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); | ||
| 360 | } | ||
| 361 | |||
| 362 | std::mutex mutex; | ||
| 363 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex | ||
| 364 | std::atomic_bool gl_cache_failed = false; | ||
| 365 | |||
| 366 | const auto find_precompiled = [&gl_cache](u64 id) { | ||
| 367 | return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); | ||
| 368 | }; | ||
| 369 | |||
| 370 | const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | ||
| 371 | std::size_t end) { | ||
| 372 | const auto scope = context->Acquire(); | ||
| 373 | |||
| 374 | for (std::size_t i = begin; i < end; ++i) { | ||
| 375 | if (stop_loading.stop_requested()) { | ||
| 376 | return; | ||
| 377 | } | ||
| 378 | const auto& entry = (*transferable)[i]; | ||
| 379 | const u64 uid = entry.unique_identifier; | ||
| 380 | const auto it = find_precompiled(uid); | ||
| 381 | const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; | ||
| 382 | |||
| 383 | const bool is_compute = entry.type == ShaderType::Compute; | ||
| 384 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 385 | auto registry = MakeRegistry(entry); | ||
| 386 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); | ||
| 387 | |||
| 388 | ProgramSharedPtr program; | ||
| 389 | if (precompiled_entry) { | ||
| 390 | // If the shader is precompiled, attempt to load it with | ||
| 391 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); | ||
| 392 | if (!program) { | ||
| 393 | gl_cache_failed = true; | ||
| 394 | } | ||
| 395 | } | ||
| 396 | if (!program) { | ||
| 397 | // Otherwise compile it from GLSL | ||
| 398 | program = BuildShader(device, entry.type, uid, ir, *registry, true); | ||
| 399 | } | ||
| 400 | |||
| 401 | PrecompiledShader shader; | ||
| 402 | shader.program = std::move(program); | ||
| 403 | shader.registry = std::move(registry); | ||
| 404 | shader.entries = MakeEntries(device, ir, entry.type); | ||
| 405 | |||
| 406 | std::scoped_lock lock{mutex}; | ||
| 407 | if (callback) { | ||
| 408 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | ||
| 409 | transferable->size()); | ||
| 410 | } | ||
| 411 | runtime_cache.emplace(entry.unique_identifier, std::move(shader)); | ||
| 412 | } | ||
| 413 | }; | ||
| 414 | |||
| 415 | const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; | ||
| 416 | const std::size_t bucket_size{transferable->size() / num_workers}; | ||
| 417 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | ||
| 418 | std::vector<std::thread> threads(num_workers); | ||
| 419 | for (std::size_t i = 0; i < num_workers; ++i) { | ||
| 420 | const bool is_last_worker = i + 1 == num_workers; | ||
| 421 | const std::size_t start{bucket_size * i}; | ||
| 422 | const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; | ||
| 423 | |||
| 424 | // On some platforms the shared context has to be created from the GUI thread | ||
| 425 | contexts[i] = emu_window.CreateSharedContext(); | ||
| 426 | threads[i] = std::thread(worker, contexts[i].get(), start, end); | ||
| 427 | } | ||
| 428 | for (auto& thread : threads) { | ||
| 429 | thread.join(); | ||
| 430 | } | ||
| 431 | |||
| 432 | if (gl_cache_failed) { | ||
| 433 | // Invalidate the precompiled cache if a shader dumped shader was rejected | ||
| 434 | disk_cache.InvalidatePrecompiled(); | ||
| 435 | precompiled_cache_altered = true; | ||
| 436 | return; | ||
| 437 | } | ||
| 438 | if (stop_loading.stop_requested()) { | ||
| 439 | return; | ||
| 440 | } | ||
| 441 | |||
| 442 | if (device.UseAssemblyShaders() || device.UseDriverCache()) { | ||
| 443 | // Don't store precompiled binaries for assembly shaders or when using the driver cache | ||
| 444 | return; | ||
| 445 | } | ||
| 446 | |||
| 447 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | ||
| 448 | // before precompiling them | ||
| 449 | |||
| 450 | for (std::size_t i = 0; i < transferable->size(); ++i) { | ||
| 451 | const u64 id = (*transferable)[i].unique_identifier; | ||
| 452 | const auto it = find_precompiled(id); | ||
| 453 | if (it == gl_cache.end()) { | ||
| 454 | const GLuint program = runtime_cache.at(id).program->source_program.handle; | ||
| 455 | disk_cache.SavePrecompiled(id, program); | ||
| 456 | precompiled_cache_altered = true; | ||
| 457 | } | ||
| 458 | } | ||
| 459 | |||
| 460 | if (precompiled_cache_altered) { | ||
| 461 | disk_cache.SaveVirtualPrecompiledFile(); | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 465 | ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||
| 466 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | ||
| 467 | const std::unordered_set<GLenum>& supported_formats) { | ||
| 468 | if (!supported_formats.contains(precompiled_entry.binary_format)) { | ||
| 469 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); | ||
| 470 | return {}; | ||
| 471 | } | ||
| 472 | |||
| 473 | auto program = std::make_shared<ProgramHandle>(); | ||
| 474 | GLuint& handle = program->source_program.handle; | ||
| 475 | handle = glCreateProgram(); | ||
| 476 | glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | ||
| 477 | glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), | ||
| 478 | static_cast<GLsizei>(precompiled_entry.binary.size())); | ||
| 479 | |||
| 480 | GLint link_status; | ||
| 481 | glGetProgramiv(handle, GL_LINK_STATUS, &link_status); | ||
| 482 | if (link_status == GL_FALSE) { | ||
| 483 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); | ||
| 484 | return {}; | ||
| 485 | } | ||
| 486 | |||
| 487 | return program; | ||
| 488 | } | ||
| 489 | |||
| 490 | Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, | ||
| 491 | VideoCommon::Shader::AsyncShaders& async_shaders) { | ||
| 492 | if (!maxwell3d.dirty.flags[Dirty::Shaders]) { | ||
| 493 | auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; | ||
| 494 | if (last_shader->IsBuilt()) { | ||
| 495 | return last_shader; | ||
| 496 | } | ||
| 497 | } | ||
| 498 | |||
| 499 | const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; | ||
| 500 | |||
| 501 | if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { | ||
| 502 | auto completed_work = async_shaders.GetCompletedWork(); | ||
| 503 | for (auto& work : completed_work) { | ||
| 504 | Shader* shader = TryGet(work.cpu_address); | ||
| 505 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 506 | if (shader == nullptr) { | ||
| 507 | continue; | ||
| 508 | } | ||
| 509 | using namespace VideoCommon::Shader; | ||
| 510 | if (work.backend == AsyncShaders::Backend::OpenGL) { | ||
| 511 | shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); | ||
| 512 | } else if (work.backend == AsyncShaders::Backend::GLASM) { | ||
| 513 | shader->AsyncGLASMBuilt(std::move(work.program.glasm)); | ||
| 514 | } | ||
| 515 | |||
| 516 | auto& registry = shader->GetRegistry(); | ||
| 517 | |||
| 518 | ShaderDiskCacheEntry entry; | ||
| 519 | entry.type = work.shader_type; | ||
| 520 | entry.code = std::move(work.code); | ||
| 521 | entry.code_b = std::move(work.code_b); | ||
| 522 | entry.unique_identifier = work.uid; | ||
| 523 | entry.bound_buffer = registry.GetBoundBuffer(); | ||
| 524 | entry.graphics_info = registry.GetGraphicsInfo(); | ||
| 525 | entry.keys = registry.GetKeys(); | ||
| 526 | entry.bound_samplers = registry.GetBoundSamplers(); | ||
| 527 | entry.bindless_samplers = registry.GetBindlessSamplers(); | ||
| 528 | disk_cache.SaveEntry(std::move(entry)); | ||
| 529 | } | ||
| 530 | } | ||
| 531 | |||
| 532 | // Look up shader in the cache based on address | ||
| 533 | const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)}; | ||
| 534 | if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { | ||
| 535 | return last_shaders[static_cast<std::size_t>(program)] = shader; | ||
| 536 | } | ||
| 537 | |||
| 538 | const u8* const host_ptr{gpu_memory.GetPointer(address)}; | ||
| 539 | |||
| 540 | // No shader found - create a new one | ||
| 541 | ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; | ||
| 542 | ProgramCode code_b; | ||
| 543 | if (program == Maxwell::ShaderProgram::VertexA) { | ||
| 544 | const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; | ||
| 545 | const u8* host_ptr_b = gpu_memory.GetPointer(address_b); | ||
| 546 | code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); | ||
| 547 | } | ||
| 548 | const std::size_t code_size = code.size() * sizeof(u64); | ||
| 549 | |||
| 550 | const u64 unique_identifier = GetUniqueIdentifier( | ||
| 551 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | ||
| 552 | |||
| 553 | const ShaderParameters params{gpu, maxwell3d, disk_cache, device, | ||
| 554 | *cpu_addr, host_ptr, unique_identifier}; | ||
| 555 | |||
| 556 | std::unique_ptr<Shader> shader; | ||
| 557 | const auto found = runtime_cache.find(unique_identifier); | ||
| 558 | if (found == runtime_cache.end()) { | ||
| 559 | shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), | ||
| 560 | async_shaders, cpu_addr.value_or(0)); | ||
| 561 | } else { | ||
| 562 | shader = Shader::CreateFromCache(params, found->second); | ||
| 563 | } | ||
| 564 | |||
| 565 | Shader* const result = shader.get(); | ||
| 566 | if (cpu_addr) { | ||
| 567 | Register(std::move(shader), *cpu_addr, code_size); | ||
| 568 | } else { | ||
| 569 | null_shader = std::move(shader); | ||
| 570 | } | ||
| 571 | |||
| 572 | return last_shaders[static_cast<std::size_t>(program)] = result; | ||
| 573 | } | ||
| 574 | |||
| 575 | Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||
| 576 | const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; | ||
| 577 | |||
| 578 | if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { | ||
| 579 | return kernel; | ||
| 580 | } | ||
| 581 | |||
| 582 | // No kernel found, create a new one | ||
| 583 | const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; | ||
| 584 | ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; | ||
| 585 | const std::size_t code_size{code.size() * sizeof(u64)}; | ||
| 586 | const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; | ||
| 587 | |||
| 588 | const ShaderParameters params{gpu, kepler_compute, disk_cache, device, | ||
| 589 | *cpu_addr, host_ptr, unique_identifier}; | ||
| 590 | |||
| 591 | std::unique_ptr<Shader> kernel; | ||
| 592 | const auto found = runtime_cache.find(unique_identifier); | ||
| 593 | if (found == runtime_cache.end()) { | ||
| 594 | kernel = Shader::CreateKernelFromMemory(params, std::move(code)); | ||
| 595 | } else { | ||
| 596 | kernel = Shader::CreateFromCache(params, found->second); | ||
| 597 | } | ||
| 598 | |||
| 599 | Shader* const result = kernel.get(); | ||
| 600 | if (cpu_addr) { | ||
| 601 | Register(std::move(kernel), *cpu_addr, code_size); | ||
| 602 | } else { | ||
| 603 | null_kernel = std::move(kernel); | ||
| 604 | } | ||
| 605 | return result; | ||
| 606 | } | ||
| 607 | |||
| 608 | } // namespace OpenGL | 46 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b30308b6f..ad3d15a76 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -19,10 +19,6 @@ | |||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "video_core/engines/shader_type.h" | 20 | #include "video_core/engines/shader_type.h" |
| 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 22 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | ||
| 24 | #include "video_core/shader/registry.h" | ||
| 25 | #include "video_core/shader/shader_ir.h" | ||
| 26 | #include "video_core/shader_cache.h" | 22 | #include "video_core/shader_cache.h" |
| 27 | 23 | ||
| 28 | namespace Tegra { | 24 | namespace Tegra { |
| @@ -33,10 +29,6 @@ namespace Core::Frontend { | |||
| 33 | class EmuWindow; | 29 | class EmuWindow; |
| 34 | } | 30 | } |
| 35 | 31 | ||
| 36 | namespace VideoCommon::Shader { | ||
| 37 | class AsyncShaders; | ||
| 38 | } | ||
| 39 | |||
| 40 | namespace OpenGL { | 32 | namespace OpenGL { |
| 41 | 33 | ||
| 42 | class Device; | 34 | class Device; |
| @@ -44,77 +36,10 @@ class RasterizerOpenGL; | |||
| 44 | 36 | ||
| 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 46 | 38 | ||
| 47 | struct ProgramHandle { | 39 | class Shader { |
| 48 | OGLProgram source_program; | ||
| 49 | OGLAssemblyProgram assembly_program; | ||
| 50 | }; | ||
| 51 | using ProgramSharedPtr = std::shared_ptr<ProgramHandle>; | ||
| 52 | |||
| 53 | struct PrecompiledShader { | ||
| 54 | ProgramSharedPtr program; | ||
| 55 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||
| 56 | ShaderEntries entries; | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct ShaderParameters { | ||
| 60 | Tegra::GPU& gpu; | ||
| 61 | Tegra::Engines::ConstBufferEngineInterface& engine; | ||
| 62 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 63 | const Device& device; | ||
| 64 | VAddr cpu_addr; | ||
| 65 | const u8* host_ptr; | ||
| 66 | u64 unique_identifier; | ||
| 67 | }; | ||
| 68 | |||
| 69 | ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, | ||
| 70 | u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, | ||
| 71 | const VideoCommon::Shader::Registry& registry, | ||
| 72 | bool hint_retrievable = false); | ||
| 73 | |||
| 74 | class Shader final { | ||
| 75 | public: | 40 | public: |
| 41 | explicit Shader(); | ||
| 76 | ~Shader(); | 42 | ~Shader(); |
| 77 | |||
| 78 | /// Gets the GL program handle for the shader | ||
| 79 | GLuint GetHandle() const; | ||
| 80 | |||
| 81 | bool IsBuilt() const; | ||
| 82 | |||
| 83 | /// Gets the shader entries for the shader | ||
| 84 | const ShaderEntries& GetEntries() const { | ||
| 85 | return entries; | ||
| 86 | } | ||
| 87 | |||
| 88 | const VideoCommon::Shader::Registry& GetRegistry() const { | ||
| 89 | return *registry; | ||
| 90 | } | ||
| 91 | |||
| 92 | /// Mark a OpenGL shader as built | ||
| 93 | void AsyncOpenGLBuilt(OGLProgram new_program); | ||
| 94 | |||
| 95 | /// Mark a GLASM shader as built | ||
| 96 | void AsyncGLASMBuilt(OGLAssemblyProgram new_program); | ||
| 97 | |||
| 98 | static std::unique_ptr<Shader> CreateStageFromMemory( | ||
| 99 | const ShaderParameters& params, Maxwell::ShaderProgram program_type, | ||
| 100 | ProgramCode program_code, ProgramCode program_code_b, | ||
| 101 | VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); | ||
| 102 | |||
| 103 | static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, | ||
| 104 | ProgramCode code); | ||
| 105 | |||
| 106 | static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params, | ||
| 107 | const PrecompiledShader& precompiled_shader); | ||
| 108 | |||
| 109 | private: | ||
| 110 | explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, | ||
| 111 | ProgramSharedPtr program, bool is_built_ = true); | ||
| 112 | |||
| 113 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||
| 114 | ShaderEntries entries; | ||
| 115 | ProgramSharedPtr program; | ||
| 116 | GLuint handle = 0; | ||
| 117 | bool is_built{}; | ||
| 118 | }; | 43 | }; |
| 119 | 44 | ||
| 120 | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { | 45 | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { |
| @@ -126,36 +51,13 @@ public: | |||
| 126 | Tegra::MemoryManager& gpu_memory_, const Device& device_); | 51 | Tegra::MemoryManager& gpu_memory_, const Device& device_); |
| 127 | ~ShaderCacheOpenGL() override; | 52 | ~ShaderCacheOpenGL() override; |
| 128 | 53 | ||
| 129 | /// Loads disk cache for the current game | ||
| 130 | void LoadDiskCache(u64 title_id, std::stop_token stop_loading, | ||
| 131 | const VideoCore::DiskResourceLoadCallback& callback); | ||
| 132 | |||
| 133 | /// Gets the current specified shader stage program | ||
| 134 | Shader* GetStageProgram(Maxwell::ShaderProgram program, | ||
| 135 | VideoCommon::Shader::AsyncShaders& async_shaders); | ||
| 136 | |||
| 137 | /// Gets a compute kernel in the passed address | ||
| 138 | Shader* GetComputeKernel(GPUVAddr code_addr); | ||
| 139 | |||
| 140 | private: | 54 | private: |
| 141 | ProgramSharedPtr GeneratePrecompiledProgram( | ||
| 142 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | ||
| 143 | const std::unordered_set<GLenum>& supported_formats); | ||
| 144 | |||
| 145 | Core::Frontend::EmuWindow& emu_window; | 55 | Core::Frontend::EmuWindow& emu_window; |
| 146 | Tegra::GPU& gpu; | 56 | Tegra::GPU& gpu; |
| 147 | Tegra::MemoryManager& gpu_memory; | 57 | Tegra::MemoryManager& gpu_memory; |
| 148 | Tegra::Engines::Maxwell3D& maxwell3d; | 58 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 149 | Tegra::Engines::KeplerCompute& kepler_compute; | 59 | Tegra::Engines::KeplerCompute& kepler_compute; |
| 150 | const Device& device; | 60 | const Device& device; |
| 151 | |||
| 152 | ShaderDiskCacheOpenGL disk_cache; | ||
| 153 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | ||
| 154 | |||
| 155 | std::unique_ptr<Shader> null_shader; | ||
| 156 | std::unique_ptr<Shader> null_kernel; | ||
| 157 | |||
| 158 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; | ||
| 159 | }; | 61 | }; |
| 160 | 62 | ||
| 161 | } // namespace OpenGL | 63 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 9c28498e8..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null | |||
| @@ -1,2986 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <string> | ||
| 7 | #include <string_view> | ||
| 8 | #include <utility> | ||
| 9 | #include <variant> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <fmt/format.h> | ||
| 13 | |||
| 14 | #include "common/alignment.h" | ||
| 15 | #include "common/assert.h" | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "common/div_ceil.h" | ||
| 18 | #include "common/logging/log.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | ||
| 20 | #include "video_core/engines/shader_type.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 24 | #include "video_core/shader/ast.h" | ||
| 25 | #include "video_core/shader/node.h" | ||
| 26 | #include "video_core/shader/shader_ir.h" | ||
| 27 | #include "video_core/shader/transform_feedback.h" | ||
| 28 | |||
| 29 | namespace OpenGL { | ||
| 30 | |||
| 31 | namespace { | ||
| 32 | |||
| 33 | using Tegra::Engines::ShaderType; | ||
| 34 | using Tegra::Shader::Attribute; | ||
| 35 | using Tegra::Shader::Header; | ||
| 36 | using Tegra::Shader::IpaInterpMode; | ||
| 37 | using Tegra::Shader::IpaMode; | ||
| 38 | using Tegra::Shader::IpaSampleMode; | ||
| 39 | using Tegra::Shader::PixelImap; | ||
| 40 | using Tegra::Shader::Register; | ||
| 41 | using Tegra::Shader::TextureType; | ||
| 42 | |||
| 43 | using namespace VideoCommon::Shader; | ||
| 44 | using namespace std::string_literals; | ||
| 45 | |||
| 46 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 47 | using Operation = const OperationNode&; | ||
| 48 | |||
| 49 | class ASTDecompiler; | ||
| 50 | class ExprDecompiler; | ||
| 51 | |||
| 52 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||
| 53 | |||
| 54 | constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; | ||
| 55 | |||
| 56 | constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; | ||
| 57 | constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; | ||
| 58 | |||
| 59 | struct TextureOffset {}; | ||
| 60 | struct TextureDerivates {}; | ||
| 61 | using TextureArgument = std::pair<Type, Node>; | ||
| 62 | using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; | ||
| 63 | |||
| 64 | constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); | ||
| 65 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); | ||
| 66 | |||
| 67 | constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt | ||
| 68 | #define ftou floatBitsToUint | ||
| 69 | #define itof intBitsToFloat | ||
| 70 | #define utof uintBitsToFloat | ||
| 71 | |||
| 72 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ | ||
| 73 | bvec2 is_nan1 = isnan(pair1); | ||
| 74 | bvec2 is_nan2 = isnan(pair2); | ||
| 75 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 76 | }} | ||
| 77 | |||
| 78 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 79 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 80 | )"; | ||
| 81 | |||
| 82 | class ShaderWriter final { | ||
| 83 | public: | ||
| 84 | void AddExpression(std::string_view text) { | ||
| 85 | DEBUG_ASSERT(scope >= 0); | ||
| 86 | if (!text.empty()) { | ||
| 87 | AppendIndentation(); | ||
| 88 | } | ||
| 89 | shader_source += text; | ||
| 90 | } | ||
| 91 | |||
| 92 | // Forwards all arguments directly to libfmt. | ||
| 93 | // Note that all formatting requirements for fmt must be | ||
| 94 | // obeyed when using this function. (e.g. {{ must be used | ||
| 95 | // printing the character '{' is desirable. Ditto for }} and '}', | ||
| 96 | // etc). | ||
| 97 | template <typename... Args> | ||
| 98 | void AddLine(std::string_view text, Args&&... args) { | ||
| 99 | AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...)); | ||
| 100 | AddNewLine(); | ||
| 101 | } | ||
| 102 | |||
| 103 | void AddNewLine() { | ||
| 104 | DEBUG_ASSERT(scope >= 0); | ||
| 105 | shader_source += '\n'; | ||
| 106 | } | ||
| 107 | |||
| 108 | std::string GenerateTemporary() { | ||
| 109 | return fmt::format("tmp{}", temporary_index++); | ||
| 110 | } | ||
| 111 | |||
| 112 | std::string GetResult() { | ||
| 113 | return std::move(shader_source); | ||
| 114 | } | ||
| 115 | |||
| 116 | s32 scope = 0; | ||
| 117 | |||
| 118 | private: | ||
| 119 | void AppendIndentation() { | ||
| 120 | shader_source.append(static_cast<std::size_t>(scope) * 4, ' '); | ||
| 121 | } | ||
| 122 | |||
| 123 | std::string shader_source; | ||
| 124 | u32 temporary_index = 1; | ||
| 125 | }; | ||
| 126 | |||
| 127 | class Expression final { | ||
| 128 | public: | ||
| 129 | Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { | ||
| 130 | ASSERT(type != Type::Void); | ||
| 131 | } | ||
| 132 | Expression() : type{Type::Void} {} | ||
| 133 | |||
| 134 | Type GetType() const { | ||
| 135 | return type; | ||
| 136 | } | ||
| 137 | |||
| 138 | std::string GetCode() const { | ||
| 139 | return code; | ||
| 140 | } | ||
| 141 | |||
| 142 | void CheckVoid() const { | ||
| 143 | ASSERT(type == Type::Void); | ||
| 144 | } | ||
| 145 | |||
| 146 | std::string As(Type type_) const { | ||
| 147 | switch (type_) { | ||
| 148 | case Type::Bool: | ||
| 149 | return AsBool(); | ||
| 150 | case Type::Bool2: | ||
| 151 | return AsBool2(); | ||
| 152 | case Type::Float: | ||
| 153 | return AsFloat(); | ||
| 154 | case Type::Int: | ||
| 155 | return AsInt(); | ||
| 156 | case Type::Uint: | ||
| 157 | return AsUint(); | ||
| 158 | case Type::HalfFloat: | ||
| 159 | return AsHalfFloat(); | ||
| 160 | default: | ||
| 161 | UNREACHABLE_MSG("Invalid type"); | ||
| 162 | return code; | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 166 | std::string AsBool() const { | ||
| 167 | switch (type) { | ||
| 168 | case Type::Bool: | ||
| 169 | return code; | ||
| 170 | default: | ||
| 171 | UNREACHABLE_MSG("Incompatible types"); | ||
| 172 | return code; | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | std::string AsBool2() const { | ||
| 177 | switch (type) { | ||
| 178 | case Type::Bool2: | ||
| 179 | return code; | ||
| 180 | default: | ||
| 181 | UNREACHABLE_MSG("Incompatible types"); | ||
| 182 | return code; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | std::string AsFloat() const { | ||
| 187 | switch (type) { | ||
| 188 | case Type::Float: | ||
| 189 | return code; | ||
| 190 | case Type::Uint: | ||
| 191 | return fmt::format("utof({})", code); | ||
| 192 | case Type::Int: | ||
| 193 | return fmt::format("itof({})", code); | ||
| 194 | case Type::HalfFloat: | ||
| 195 | return fmt::format("utof(packHalf2x16({}))", code); | ||
| 196 | default: | ||
| 197 | UNREACHABLE_MSG("Incompatible types"); | ||
| 198 | return code; | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | std::string AsInt() const { | ||
| 203 | switch (type) { | ||
| 204 | case Type::Float: | ||
| 205 | return fmt::format("ftoi({})", code); | ||
| 206 | case Type::Uint: | ||
| 207 | return fmt::format("int({})", code); | ||
| 208 | case Type::Int: | ||
| 209 | return code; | ||
| 210 | case Type::HalfFloat: | ||
| 211 | return fmt::format("int(packHalf2x16({}))", code); | ||
| 212 | default: | ||
| 213 | UNREACHABLE_MSG("Incompatible types"); | ||
| 214 | return code; | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | std::string AsUint() const { | ||
| 219 | switch (type) { | ||
| 220 | case Type::Float: | ||
| 221 | return fmt::format("ftou({})", code); | ||
| 222 | case Type::Uint: | ||
| 223 | return code; | ||
| 224 | case Type::Int: | ||
| 225 | return fmt::format("uint({})", code); | ||
| 226 | case Type::HalfFloat: | ||
| 227 | return fmt::format("packHalf2x16({})", code); | ||
| 228 | default: | ||
| 229 | UNREACHABLE_MSG("Incompatible types"); | ||
| 230 | return code; | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | std::string AsHalfFloat() const { | ||
| 235 | switch (type) { | ||
| 236 | case Type::Float: | ||
| 237 | return fmt::format("unpackHalf2x16(ftou({}))", code); | ||
| 238 | case Type::Uint: | ||
| 239 | return fmt::format("unpackHalf2x16({})", code); | ||
| 240 | case Type::Int: | ||
| 241 | return fmt::format("unpackHalf2x16(int({}))", code); | ||
| 242 | case Type::HalfFloat: | ||
| 243 | return code; | ||
| 244 | default: | ||
| 245 | UNREACHABLE_MSG("Incompatible types"); | ||
| 246 | return code; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | private: | ||
| 251 | std::string code; | ||
| 252 | Type type{}; | ||
| 253 | }; | ||
| 254 | |||
| 255 | const char* GetTypeString(Type type) { | ||
| 256 | switch (type) { | ||
| 257 | case Type::Bool: | ||
| 258 | return "bool"; | ||
| 259 | case Type::Bool2: | ||
| 260 | return "bvec2"; | ||
| 261 | case Type::Float: | ||
| 262 | return "float"; | ||
| 263 | case Type::Int: | ||
| 264 | return "int"; | ||
| 265 | case Type::Uint: | ||
| 266 | return "uint"; | ||
| 267 | case Type::HalfFloat: | ||
| 268 | return "vec2"; | ||
| 269 | default: | ||
| 270 | UNREACHABLE_MSG("Invalid type"); | ||
| 271 | return "<invalid type>"; | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | ||
| 276 | switch (image_type) { | ||
| 277 | case Tegra::Shader::ImageType::Texture1D: | ||
| 278 | return "1D"; | ||
| 279 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 280 | return "Buffer"; | ||
| 281 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 282 | return "1DArray"; | ||
| 283 | case Tegra::Shader::ImageType::Texture2D: | ||
| 284 | return "2D"; | ||
| 285 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 286 | return "2DArray"; | ||
| 287 | case Tegra::Shader::ImageType::Texture3D: | ||
| 288 | return "3D"; | ||
| 289 | default: | ||
| 290 | UNREACHABLE(); | ||
| 291 | return "1D"; | ||
| 292 | } | ||
| 293 | } | ||
| 294 | |||
| 295 | /// Describes primitive behavior on geometry shaders | ||
| 296 | std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { | ||
| 297 | switch (topology) { | ||
| 298 | case Maxwell::PrimitiveTopology::Points: | ||
| 299 | return {"points", 1}; | ||
| 300 | case Maxwell::PrimitiveTopology::Lines: | ||
| 301 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 302 | return {"lines", 2}; | ||
| 303 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 304 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 305 | return {"lines_adjacency", 4}; | ||
| 306 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 307 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 308 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 309 | return {"triangles", 3}; | ||
| 310 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 311 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 312 | return {"triangles_adjacency", 6}; | ||
| 313 | default: | ||
| 314 | UNIMPLEMENTED_MSG("topology={}", topology); | ||
| 315 | return {"points", 1}; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | /// Generates code to use for a swizzle operation. | ||
| 320 | constexpr const char* GetSwizzle(std::size_t element) { | ||
| 321 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; | ||
| 322 | return swizzle.at(element); | ||
| 323 | } | ||
| 324 | |||
| 325 | constexpr const char* GetColorSwizzle(std::size_t element) { | ||
| 326 | constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; | ||
| 327 | return swizzle.at(element); | ||
| 328 | } | ||
| 329 | |||
| 330 | /// Translate topology | ||
| 331 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 332 | switch (topology) { | ||
| 333 | case Tegra::Shader::OutputTopology::PointList: | ||
| 334 | return "points"; | ||
| 335 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 336 | return "line_strip"; | ||
| 337 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 338 | return "triangle_strip"; | ||
| 339 | default: | ||
| 340 | UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); | ||
| 341 | return "points"; | ||
| 342 | } | ||
| 343 | } | ||
| 344 | |||
| 345 | /// Returns true if an object has to be treated as precise | ||
| 346 | bool IsPrecise(Operation operand) { | ||
| 347 | const auto& meta{operand.GetMeta()}; | ||
| 348 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { | ||
| 349 | return arithmetic->precise; | ||
| 350 | } | ||
| 351 | return false; | ||
| 352 | } | ||
| 353 | |||
| 354 | bool IsPrecise(const Node& node) { | ||
| 355 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 356 | return IsPrecise(*operation); | ||
| 357 | } | ||
| 358 | return false; | ||
| 359 | } | ||
| 360 | |||
| 361 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 362 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 363 | } | ||
| 364 | |||
| 365 | constexpr bool IsLegacyTexCoord(Attribute::Index index) { | ||
| 366 | return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) && | ||
| 367 | static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7); | ||
| 368 | } | ||
| 369 | |||
| 370 | constexpr Attribute::Index ToGenericAttribute(u64 value) { | ||
| 371 | return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); | ||
| 372 | } | ||
| 373 | |||
| 374 | constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { | ||
| 375 | return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0); | ||
| 376 | } | ||
| 377 | |||
| 378 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 379 | ASSERT(IsGenericAttribute(index)); | ||
| 380 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 381 | } | ||
| 382 | |||
| 383 | constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { | ||
| 384 | switch (stack) { | ||
| 385 | case MetaStackClass::Ssy: | ||
| 386 | return "ssy"; | ||
| 387 | case MetaStackClass::Pbk: | ||
| 388 | return "pbk"; | ||
| 389 | } | ||
| 390 | return {}; | ||
| 391 | } | ||
| 392 | |||
| 393 | std::string FlowStackName(MetaStackClass stack) { | ||
| 394 | return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); | ||
| 395 | } | ||
| 396 | |||
| 397 | std::string FlowStackTopName(MetaStackClass stack) { | ||
| 398 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | ||
| 399 | } | ||
| 400 | |||
| 401 | struct GenericVaryingDescription { | ||
| 402 | std::string name; | ||
| 403 | u8 first_element = 0; | ||
| 404 | bool is_scalar = false; | ||
| 405 | }; | ||
| 406 | |||
| 407 | class GLSLDecompiler final { | ||
| 408 | public: | ||
| 409 | explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | ||
| 410 | ShaderType stage_, std::string_view identifier_, | ||
| 411 | std::string_view suffix_) | ||
| 412 | : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, | ||
| 413 | identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { | ||
| 414 | if (stage != ShaderType::Compute) { | ||
| 415 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 416 | } | ||
| 417 | } | ||
| 418 | |||
| 419 | void Decompile() { | ||
| 420 | DeclareHeader(); | ||
| 421 | DeclareVertex(); | ||
| 422 | DeclareGeometry(); | ||
| 423 | DeclareFragment(); | ||
| 424 | DeclareCompute(); | ||
| 425 | DeclareInputAttributes(); | ||
| 426 | DeclareOutputAttributes(); | ||
| 427 | DeclareImages(); | ||
| 428 | DeclareSamplers(); | ||
| 429 | DeclareGlobalMemory(); | ||
| 430 | DeclareConstantBuffers(); | ||
| 431 | DeclareLocalMemory(); | ||
| 432 | DeclareRegisters(); | ||
| 433 | DeclarePredicates(); | ||
| 434 | DeclareInternalFlags(); | ||
| 435 | DeclareCustomVariables(); | ||
| 436 | DeclarePhysicalAttributeReader(); | ||
| 437 | |||
| 438 | code.AddLine("void main() {{"); | ||
| 439 | ++code.scope; | ||
| 440 | |||
| 441 | if (stage == ShaderType::Vertex) { | ||
| 442 | code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); | ||
| 443 | } | ||
| 444 | |||
| 445 | if (ir.IsDecompiled()) { | ||
| 446 | DecompileAST(); | ||
| 447 | } else { | ||
| 448 | DecompileBranchMode(); | ||
| 449 | } | ||
| 450 | |||
| 451 | --code.scope; | ||
| 452 | code.AddLine("}}"); | ||
| 453 | } | ||
| 454 | |||
| 455 | std::string GetResult() { | ||
| 456 | return code.GetResult(); | ||
| 457 | } | ||
| 458 | |||
| 459 | private: | ||
| 460 | friend class ASTDecompiler; | ||
| 461 | friend class ExprDecompiler; | ||
| 462 | |||
| 463 | void DecompileBranchMode() { | ||
| 464 | // VM's program counter | ||
| 465 | const auto first_address = ir.GetBasicBlocks().begin()->first; | ||
| 466 | code.AddLine("uint jmp_to = {}U;", first_address); | ||
| 467 | |||
| 468 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | ||
| 469 | // unlikely that shaders will use 20 nested SSYs and PBKs. | ||
| 470 | constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 471 | if (!ir.IsFlowStackDisabled()) { | ||
| 472 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | ||
| 473 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | ||
| 474 | code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); | ||
| 475 | } | ||
| 476 | } | ||
| 477 | |||
| 478 | code.AddLine("while (true) {{"); | ||
| 479 | ++code.scope; | ||
| 480 | |||
| 481 | code.AddLine("switch (jmp_to) {{"); | ||
| 482 | |||
| 483 | for (const auto& pair : ir.GetBasicBlocks()) { | ||
| 484 | const auto& [address, bb] = pair; | ||
| 485 | code.AddLine("case 0x{:X}U: {{", address); | ||
| 486 | ++code.scope; | ||
| 487 | |||
| 488 | VisitBlock(bb); | ||
| 489 | |||
| 490 | --code.scope; | ||
| 491 | code.AddLine("}}"); | ||
| 492 | } | ||
| 493 | |||
| 494 | code.AddLine("default: return;"); | ||
| 495 | code.AddLine("}}"); | ||
| 496 | |||
| 497 | --code.scope; | ||
| 498 | code.AddLine("}}"); | ||
| 499 | } | ||
| 500 | |||
| 501 | void DecompileAST(); | ||
| 502 | |||
| 503 | void DeclareHeader() { | ||
| 504 | if (!identifier.empty()) { | ||
| 505 | code.AddLine("// {}", identifier); | ||
| 506 | } | ||
| 507 | const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); | ||
| 508 | code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); | ||
| 509 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); | ||
| 510 | if (device.HasShaderBallot()) { | ||
| 511 | code.AddLine("#extension GL_ARB_shader_ballot : require"); | ||
| 512 | } | ||
| 513 | if (device.HasVertexViewportLayer()) { | ||
| 514 | code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); | ||
| 515 | } | ||
| 516 | if (device.HasImageLoadFormatted()) { | ||
| 517 | code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); | ||
| 518 | } | ||
| 519 | if (device.HasTextureShadowLod()) { | ||
| 520 | code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); | ||
| 521 | } | ||
| 522 | if (device.HasWarpIntrinsics()) { | ||
| 523 | code.AddLine("#extension GL_NV_gpu_shader5 : require"); | ||
| 524 | code.AddLine("#extension GL_NV_shader_thread_group : require"); | ||
| 525 | code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); | ||
| 526 | } | ||
| 527 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 | ||
| 528 | // operations) on places where we don't want to. | ||
| 529 | // Thanks to Ryujinx for finding this workaround. | ||
| 530 | code.AddLine("#pragma optionNV(fastmath off)"); | ||
| 531 | |||
| 532 | code.AddNewLine(); | ||
| 533 | |||
| 534 | code.AddLine(COMMON_DECLARATIONS); | ||
| 535 | } | ||
| 536 | |||
| 537 | void DeclareVertex() { | ||
| 538 | if (stage != ShaderType::Vertex) { | ||
| 539 | return; | ||
| 540 | } | ||
| 541 | |||
| 542 | DeclareVertexRedeclarations(); | ||
| 543 | } | ||
| 544 | |||
| 545 | void DeclareGeometry() { | ||
| 546 | if (stage != ShaderType::Geometry) { | ||
| 547 | return; | ||
| 548 | } | ||
| 549 | |||
| 550 | const auto& info = registry.GetGraphicsInfo(); | ||
| 551 | const auto input_topology = info.primitive_topology; | ||
| 552 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); | ||
| 553 | max_input_vertices = max_vertices; | ||
| 554 | code.AddLine("layout ({}) in;", glsl_topology); | ||
| 555 | |||
| 556 | const auto topology = GetTopologyName(header.common3.output_topology); | ||
| 557 | const auto max_output_vertices = header.common4.max_output_vertices.Value(); | ||
| 558 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); | ||
| 559 | code.AddNewLine(); | ||
| 560 | |||
| 561 | code.AddLine("in gl_PerVertex {{"); | ||
| 562 | ++code.scope; | ||
| 563 | code.AddLine("vec4 gl_Position;"); | ||
| 564 | --code.scope; | ||
| 565 | code.AddLine("}} gl_in[];"); | ||
| 566 | |||
| 567 | DeclareVertexRedeclarations(); | ||
| 568 | } | ||
| 569 | |||
| 570 | void DeclareFragment() { | ||
| 571 | if (stage != ShaderType::Fragment) { | ||
| 572 | return; | ||
| 573 | } | ||
| 574 | if (ir.UsesLegacyVaryings()) { | ||
| 575 | code.AddLine("in gl_PerFragment {{"); | ||
| 576 | ++code.scope; | ||
| 577 | code.AddLine("vec4 gl_TexCoord[8];"); | ||
| 578 | code.AddLine("vec4 gl_Color;"); | ||
| 579 | code.AddLine("vec4 gl_SecondaryColor;"); | ||
| 580 | --code.scope; | ||
| 581 | code.AddLine("}};"); | ||
| 582 | } | ||
| 583 | |||
| 584 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 585 | code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); | ||
| 586 | } | ||
| 587 | } | ||
| 588 | |||
| 589 | void DeclareCompute() { | ||
| 590 | if (stage != ShaderType::Compute) { | ||
| 591 | return; | ||
| 592 | } | ||
| 593 | const auto& info = registry.GetComputeInfo(); | ||
| 594 | if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { | ||
| 595 | const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||
| 596 | if (size > limit) { | ||
| 597 | LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", | ||
| 598 | size, limit); | ||
| 599 | size = limit; | ||
| 600 | } | ||
| 601 | |||
| 602 | code.AddLine("shared uint smem[{}];", size / 4); | ||
| 603 | code.AddNewLine(); | ||
| 604 | } | ||
| 605 | code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", | ||
| 606 | info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); | ||
| 607 | code.AddNewLine(); | ||
| 608 | } | ||
| 609 | |||
| 610 | void DeclareVertexRedeclarations() { | ||
| 611 | code.AddLine("out gl_PerVertex {{"); | ||
| 612 | ++code.scope; | ||
| 613 | |||
| 614 | auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); | ||
| 615 | if (!pos_xfb.empty()) { | ||
| 616 | pos_xfb = fmt::format("layout ({}) ", pos_xfb); | ||
| 617 | } | ||
| 618 | const char* pos_type = | ||
| 619 | FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); | ||
| 620 | code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); | ||
| 621 | |||
| 622 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 623 | if (attribute == Attribute::Index::ClipDistances0123 || | ||
| 624 | attribute == Attribute::Index::ClipDistances4567) { | ||
| 625 | code.AddLine("float gl_ClipDistance[];"); | ||
| 626 | break; | ||
| 627 | } | ||
| 628 | } | ||
| 629 | |||
| 630 | if (stage != ShaderType::Geometry && | ||
| 631 | (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { | ||
| 632 | if (ir.UsesLayer()) { | ||
| 633 | code.AddLine("int gl_Layer;"); | ||
| 634 | } | ||
| 635 | if (ir.UsesViewportIndex()) { | ||
| 636 | code.AddLine("int gl_ViewportIndex;"); | ||
| 637 | } | ||
| 638 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && | ||
| 639 | !device.HasVertexViewportLayer()) { | ||
| 640 | LOG_ERROR( | ||
| 641 | Render_OpenGL, | ||
| 642 | "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); | ||
| 643 | } | ||
| 644 | |||
| 645 | if (ir.UsesPointSize()) { | ||
| 646 | code.AddLine("float gl_PointSize;"); | ||
| 647 | } | ||
| 648 | |||
| 649 | if (ir.UsesLegacyVaryings()) { | ||
| 650 | code.AddLine("vec4 gl_TexCoord[8];"); | ||
| 651 | code.AddLine("vec4 gl_FrontColor;"); | ||
| 652 | code.AddLine("vec4 gl_FrontSecondaryColor;"); | ||
| 653 | code.AddLine("vec4 gl_BackColor;"); | ||
| 654 | code.AddLine("vec4 gl_BackSecondaryColor;"); | ||
| 655 | } | ||
| 656 | |||
| 657 | --code.scope; | ||
| 658 | code.AddLine("}};"); | ||
| 659 | code.AddNewLine(); | ||
| 660 | |||
| 661 | if (stage == ShaderType::Geometry) { | ||
| 662 | if (ir.UsesLayer()) { | ||
| 663 | code.AddLine("out int gl_Layer;"); | ||
| 664 | } | ||
| 665 | if (ir.UsesViewportIndex()) { | ||
| 666 | code.AddLine("out int gl_ViewportIndex;"); | ||
| 667 | } | ||
| 668 | } | ||
| 669 | code.AddNewLine(); | ||
| 670 | } | ||
| 671 | |||
| 672 | void DeclareRegisters() { | ||
| 673 | const auto& registers = ir.GetRegisters(); | ||
| 674 | for (const u32 gpr : registers) { | ||
| 675 | code.AddLine("float {} = 0.0f;", GetRegister(gpr)); | ||
| 676 | } | ||
| 677 | if (!registers.empty()) { | ||
| 678 | code.AddNewLine(); | ||
| 679 | } | ||
| 680 | } | ||
| 681 | |||
| 682 | void DeclareCustomVariables() { | ||
| 683 | const u32 num_custom_variables = ir.GetNumCustomVariables(); | ||
| 684 | for (u32 i = 0; i < num_custom_variables; ++i) { | ||
| 685 | code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); | ||
| 686 | } | ||
| 687 | if (num_custom_variables > 0) { | ||
| 688 | code.AddNewLine(); | ||
| 689 | } | ||
| 690 | } | ||
| 691 | |||
| 692 | void DeclarePredicates() { | ||
| 693 | const auto& predicates = ir.GetPredicates(); | ||
| 694 | for (const auto pred : predicates) { | ||
| 695 | code.AddLine("bool {} = false;", GetPredicate(pred)); | ||
| 696 | } | ||
| 697 | if (!predicates.empty()) { | ||
| 698 | code.AddNewLine(); | ||
| 699 | } | ||
| 700 | } | ||
| 701 | |||
| 702 | void DeclareLocalMemory() { | ||
| 703 | u64 local_memory_size = 0; | ||
| 704 | if (stage == ShaderType::Compute) { | ||
| 705 | local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 706 | } else { | ||
| 707 | local_memory_size = header.GetLocalMemorySize(); | ||
| 708 | } | ||
| 709 | if (local_memory_size == 0) { | ||
| 710 | return; | ||
| 711 | } | ||
| 712 | const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; | ||
| 713 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); | ||
| 714 | code.AddNewLine(); | ||
| 715 | } | ||
| 716 | |||
| 717 | void DeclareInternalFlags() { | ||
| 718 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { | ||
| 719 | const auto flag_code = static_cast<InternalFlag>(flag); | ||
| 720 | code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); | ||
| 721 | } | ||
| 722 | code.AddNewLine(); | ||
| 723 | } | ||
| 724 | |||
| 725 | const char* GetInputFlags(PixelImap attribute) { | ||
| 726 | switch (attribute) { | ||
| 727 | case PixelImap::Perspective: | ||
| 728 | return "smooth"; | ||
| 729 | case PixelImap::Constant: | ||
| 730 | return "flat"; | ||
| 731 | case PixelImap::ScreenLinear: | ||
| 732 | return "noperspective"; | ||
| 733 | case PixelImap::Unused: | ||
| 734 | break; | ||
| 735 | } | ||
| 736 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); | ||
| 737 | return {}; | ||
| 738 | } | ||
| 739 | |||
| 740 | void DeclareInputAttributes() { | ||
| 741 | if (ir.HasPhysicalAttributes()) { | ||
| 742 | const u32 num_inputs{GetNumPhysicalInputAttributes()}; | ||
| 743 | for (u32 i = 0; i < num_inputs; ++i) { | ||
| 744 | DeclareInputAttribute(ToGenericAttribute(i), true); | ||
| 745 | } | ||
| 746 | code.AddNewLine(); | ||
| 747 | return; | ||
| 748 | } | ||
| 749 | |||
| 750 | const auto& attributes = ir.GetInputAttributes(); | ||
| 751 | for (const auto index : attributes) { | ||
| 752 | if (IsGenericAttribute(index)) { | ||
| 753 | DeclareInputAttribute(index, false); | ||
| 754 | } | ||
| 755 | } | ||
| 756 | if (!attributes.empty()) { | ||
| 757 | code.AddNewLine(); | ||
| 758 | } | ||
| 759 | } | ||
| 760 | |||
| 761 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { | ||
| 762 | const u32 location{GetGenericAttributeIndex(index)}; | ||
| 763 | |||
| 764 | std::string name{GetGenericInputAttribute(index)}; | ||
| 765 | if (stage == ShaderType::Geometry) { | ||
| 766 | name = "gs_" + name + "[]"; | ||
| 767 | } | ||
| 768 | |||
| 769 | std::string suffix_; | ||
| 770 | if (stage == ShaderType::Fragment) { | ||
| 771 | const auto input_mode{header.ps.GetPixelImap(location)}; | ||
| 772 | if (input_mode == PixelImap::Unused) { | ||
| 773 | return; | ||
| 774 | } | ||
| 775 | suffix_ = GetInputFlags(input_mode); | ||
| 776 | } | ||
| 777 | |||
| 778 | code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); | ||
| 779 | } | ||
| 780 | |||
| 781 | void DeclareOutputAttributes() { | ||
| 782 | if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { | ||
| 783 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | ||
| 784 | DeclareOutputAttribute(ToGenericAttribute(i)); | ||
| 785 | } | ||
| 786 | code.AddNewLine(); | ||
| 787 | return; | ||
| 788 | } | ||
| 789 | |||
| 790 | const auto& attributes = ir.GetOutputAttributes(); | ||
| 791 | for (const auto index : attributes) { | ||
| 792 | if (IsGenericAttribute(index)) { | ||
| 793 | DeclareOutputAttribute(index); | ||
| 794 | } | ||
| 795 | } | ||
| 796 | if (!attributes.empty()) { | ||
| 797 | code.AddNewLine(); | ||
| 798 | } | ||
| 799 | } | ||
| 800 | |||
| 801 | std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { | ||
| 802 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 803 | const auto it = transform_feedback.find(location); | ||
| 804 | if (it == transform_feedback.end()) { | ||
| 805 | return std::nullopt; | ||
| 806 | } | ||
| 807 | return it->second.components; | ||
| 808 | } | ||
| 809 | |||
| 810 | std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { | ||
| 811 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 812 | const auto it = transform_feedback.find(location); | ||
| 813 | if (it == transform_feedback.end()) { | ||
| 814 | return {}; | ||
| 815 | } | ||
| 816 | |||
| 817 | const VaryingTFB& tfb = it->second; | ||
| 818 | return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, | ||
| 819 | tfb.offset, tfb.stride); | ||
| 820 | } | ||
| 821 | |||
| 822 | void DeclareOutputAttribute(Attribute::Index index) { | ||
| 823 | static constexpr std::string_view swizzle = "xyzw"; | ||
| 824 | u8 element = 0; | ||
| 825 | while (element < 4) { | ||
| 826 | auto xfb = GetTransformFeedbackDecoration(index, element); | ||
| 827 | if (!xfb.empty()) { | ||
| 828 | xfb = fmt::format(", {}", xfb); | ||
| 829 | } | ||
| 830 | const std::size_t remainder = 4 - element; | ||
| 831 | const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); | ||
| 832 | const char* const type = FLOAT_TYPES.at(num_components - 1); | ||
| 833 | |||
| 834 | const u32 location = GetGenericAttributeIndex(index); | ||
| 835 | |||
| 836 | GenericVaryingDescription description; | ||
| 837 | description.first_element = static_cast<u8>(element); | ||
| 838 | description.is_scalar = num_components == 1; | ||
| 839 | description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); | ||
| 840 | if (element != 0 || num_components != 4) { | ||
| 841 | const std::string_view name_swizzle = swizzle.substr(element, num_components); | ||
| 842 | description.name = fmt::format("{}_{}", description.name, name_swizzle); | ||
| 843 | } | ||
| 844 | for (std::size_t i = 0; i < num_components; ++i) { | ||
| 845 | const u8 offset = static_cast<u8>(location * 4 + element + i); | ||
| 846 | varying_description.insert({offset, description}); | ||
| 847 | } | ||
| 848 | |||
| 849 | code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, | ||
| 850 | xfb, type, description.name); | ||
| 851 | |||
| 852 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 853 | } | ||
| 854 | } | ||
| 855 | |||
| 856 | void DeclareConstantBuffers() { | ||
| 857 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; | ||
| 858 | for (const auto& [index, info] : ir.GetConstantBuffers()) { | ||
| 859 | const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); | ||
| 860 | const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; | ||
| 861 | code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, | ||
| 862 | GetConstBufferBlock(index)); | ||
| 863 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); | ||
| 864 | code.AddLine("}};"); | ||
| 865 | code.AddNewLine(); | ||
| 866 | } | ||
| 867 | } | ||
| 868 | |||
| 869 | void DeclareGlobalMemory() { | ||
| 870 | u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 871 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 872 | // Since we don't know how the shader will use the shader, hint the driver to disable as | ||
| 873 | // much optimizations as possible | ||
| 874 | std::string qualifier = "coherent volatile"; | ||
| 875 | if (usage.is_read && !usage.is_written) { | ||
| 876 | qualifier += " readonly"; | ||
| 877 | } else if (usage.is_written && !usage.is_read) { | ||
| 878 | qualifier += " writeonly"; | ||
| 879 | } | ||
| 880 | |||
| 881 | code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, | ||
| 882 | GetGlobalMemoryBlock(base)); | ||
| 883 | code.AddLine(" uint {}[];", GetGlobalMemory(base)); | ||
| 884 | code.AddLine("}};"); | ||
| 885 | code.AddNewLine(); | ||
| 886 | } | ||
| 887 | } | ||
| 888 | |||
| 889 | void DeclareSamplers() { | ||
| 890 | u32 binding = device.GetBaseBindings(stage).sampler; | ||
| 891 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 892 | const std::string name = GetSampler(sampler); | ||
| 893 | const std::string description = fmt::format("layout (binding = {}) uniform", binding); | ||
| 894 | binding += sampler.is_indexed ? sampler.size : 1; | ||
| 895 | |||
| 896 | std::string sampler_type = [&]() { | ||
| 897 | if (sampler.is_buffer) { | ||
| 898 | return "samplerBuffer"; | ||
| 899 | } | ||
| 900 | switch (sampler.type) { | ||
| 901 | case TextureType::Texture1D: | ||
| 902 | return "sampler1D"; | ||
| 903 | case TextureType::Texture2D: | ||
| 904 | return "sampler2D"; | ||
| 905 | case TextureType::Texture3D: | ||
| 906 | return "sampler3D"; | ||
| 907 | case TextureType::TextureCube: | ||
| 908 | return "samplerCube"; | ||
| 909 | default: | ||
| 910 | UNREACHABLE(); | ||
| 911 | return "sampler2D"; | ||
| 912 | } | ||
| 913 | }(); | ||
| 914 | if (sampler.is_array) { | ||
| 915 | sampler_type += "Array"; | ||
| 916 | } | ||
| 917 | if (sampler.is_shadow) { | ||
| 918 | sampler_type += "Shadow"; | ||
| 919 | } | ||
| 920 | |||
| 921 | if (!sampler.is_indexed) { | ||
| 922 | code.AddLine("{} {} {};", description, sampler_type, name); | ||
| 923 | } else { | ||
| 924 | code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size); | ||
| 925 | } | ||
| 926 | } | ||
| 927 | if (!ir.GetSamplers().empty()) { | ||
| 928 | code.AddNewLine(); | ||
| 929 | } | ||
| 930 | } | ||
| 931 | |||
| 932 | void DeclarePhysicalAttributeReader() { | ||
| 933 | if (!ir.HasPhysicalAttributes()) { | ||
| 934 | return; | ||
| 935 | } | ||
| 936 | code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); | ||
| 937 | ++code.scope; | ||
| 938 | code.AddLine("switch (physical_address) {{"); | ||
| 939 | |||
| 940 | // Just declare generic attributes for now. | ||
| 941 | const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())}; | ||
| 942 | for (u32 index = 0; index < num_attributes; ++index) { | ||
| 943 | const auto attribute{ToGenericAttribute(index)}; | ||
| 944 | for (u32 element = 0; element < 4; ++element) { | ||
| 945 | constexpr u32 generic_base = 0x80; | ||
| 946 | constexpr u32 generic_stride = 16; | ||
| 947 | constexpr u32 element_stride = 4; | ||
| 948 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | ||
| 949 | |||
| 950 | const bool declared = stage != ShaderType::Fragment || | ||
| 951 | header.ps.GetPixelImap(index) != PixelImap::Unused; | ||
| 952 | const std::string value = | ||
| 953 | declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; | ||
| 954 | code.AddLine("case 0x{:X}U: return {};", address, value); | ||
| 955 | } | ||
| 956 | } | ||
| 957 | |||
| 958 | code.AddLine("default: return 0;"); | ||
| 959 | |||
| 960 | code.AddLine("}}"); | ||
| 961 | --code.scope; | ||
| 962 | code.AddLine("}}"); | ||
| 963 | code.AddNewLine(); | ||
| 964 | } | ||
| 965 | |||
| 966 | void DeclareImages() { | ||
| 967 | u32 binding = device.GetBaseBindings(stage).image; | ||
| 968 | for (const auto& image : ir.GetImages()) { | ||
| 969 | std::string qualifier = "coherent volatile"; | ||
| 970 | if (image.is_read && !image.is_written) { | ||
| 971 | qualifier += " readonly"; | ||
| 972 | } else if (image.is_written && !image.is_read) { | ||
| 973 | qualifier += " writeonly"; | ||
| 974 | } | ||
| 975 | |||
| 976 | const char* format = image.is_atomic ? "r32ui, " : ""; | ||
| 977 | const char* type_declaration = GetImageTypeDeclaration(image.type); | ||
| 978 | code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, | ||
| 979 | qualifier, type_declaration, GetImage(image)); | ||
| 980 | } | ||
| 981 | if (!ir.GetImages().empty()) { | ||
| 982 | code.AddNewLine(); | ||
| 983 | } | ||
| 984 | } | ||
| 985 | |||
| 986 | void VisitBlock(const NodeBlock& bb) { | ||
| 987 | for (const auto& node : bb) { | ||
| 988 | Visit(node).CheckVoid(); | ||
| 989 | } | ||
| 990 | } | ||
| 991 | |||
| 992 | Expression Visit(const Node& node) { | ||
| 993 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 994 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 995 | Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); | ||
| 996 | } | ||
| 997 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); | ||
| 998 | if (operation_index >= operation_decompilers.size()) { | ||
| 999 | UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); | ||
| 1000 | return {}; | ||
| 1001 | } | ||
| 1002 | const auto decompiler = operation_decompilers[operation_index]; | ||
| 1003 | if (decompiler == nullptr) { | ||
| 1004 | UNREACHABLE_MSG("Undefined operation: {}", operation_index); | ||
| 1005 | return {}; | ||
| 1006 | } | ||
| 1007 | return (this->*decompiler)(*operation); | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1011 | const u32 index = gpr->GetIndex(); | ||
| 1012 | if (index == Register::ZeroIndex) { | ||
| 1013 | return {"0U", Type::Uint}; | ||
| 1014 | } | ||
| 1015 | return {GetRegister(index), Type::Float}; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1019 | const u32 index = cv->GetIndex(); | ||
| 1020 | return {GetCustomVariable(index), Type::Float}; | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1024 | const u32 value = immediate->GetValue(); | ||
| 1025 | if (value < 10) { | ||
| 1026 | // For eyecandy avoid using hex numbers on single digits | ||
| 1027 | return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; | ||
| 1028 | } | ||
| 1029 | return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1033 | const auto value = [&]() -> std::string { | ||
| 1034 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1035 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1036 | return "true"; | ||
| 1037 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1038 | return "false"; | ||
| 1039 | default: | ||
| 1040 | return GetPredicate(index); | ||
| 1041 | } | ||
| 1042 | }(); | ||
| 1043 | if (predicate->IsNegated()) { | ||
| 1044 | return {fmt::format("!({})", value), Type::Bool}; | ||
| 1045 | } | ||
| 1046 | return {value, Type::Bool}; | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1050 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, | ||
| 1051 | "Physical attributes in geometry shaders are not implemented"); | ||
| 1052 | if (abuf->IsPhysicalBuffer()) { | ||
| 1053 | return {fmt::format("ReadPhysicalAttribute({})", | ||
| 1054 | Visit(abuf->GetPhysicalAddress()).AsUint()), | ||
| 1055 | Type::Float}; | ||
| 1056 | } | ||
| 1057 | return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1061 | const Node offset = cbuf->GetOffset(); | ||
| 1062 | |||
| 1063 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1064 | // Direct access | ||
| 1065 | const u32 offset_imm = immediate->GetValue(); | ||
| 1066 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); | ||
| 1067 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | ||
| 1068 | offset_imm / (4 * 4), (offset_imm / 4) % 4), | ||
| 1069 | Type::Uint}; | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | // Indirect access | ||
| 1073 | const std::string final_offset = code.GenerateTemporary(); | ||
| 1074 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); | ||
| 1075 | |||
| 1076 | if (!device.HasComponentIndexingBug()) { | ||
| 1077 | return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), | ||
| 1078 | final_offset, final_offset), | ||
| 1079 | Type::Uint}; | ||
| 1080 | } | ||
| 1081 | |||
| 1082 | // AMD's proprietary GLSL compiler emits ill code for variable component access. | ||
| 1083 | // To bypass this driver bug generate 4 ifs, one per each component. | ||
| 1084 | const std::string pack = code.GenerateTemporary(); | ||
| 1085 | code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), | ||
| 1086 | final_offset); | ||
| 1087 | |||
| 1088 | const std::string result = code.GenerateTemporary(); | ||
| 1089 | code.AddLine("uint {};", result); | ||
| 1090 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 1091 | code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, | ||
| 1092 | GetSwizzle(swizzle)); | ||
| 1093 | } | ||
| 1094 | return {result, Type::Uint}; | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1098 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); | ||
| 1099 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); | ||
| 1100 | const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); | ||
| 1101 | return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), | ||
| 1102 | Type::Uint}; | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1106 | return { | ||
| 1107 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), | ||
| 1108 | Type::Uint}; | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1112 | return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1116 | return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1120 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1121 | Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); | ||
| 1122 | } | ||
| 1123 | // It's invalid to call conditional on nested nodes, use an operation instead | ||
| 1124 | code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); | ||
| 1125 | ++code.scope; | ||
| 1126 | |||
| 1127 | VisitBlock(conditional->GetCode()); | ||
| 1128 | |||
| 1129 | --code.scope; | ||
| 1130 | code.AddLine("}}"); | ||
| 1131 | return {}; | ||
| 1132 | } | ||
| 1133 | |||
| 1134 | if (const auto comment = std::get_if<CommentNode>(&*node)) { | ||
| 1135 | code.AddLine("// " + comment->GetText()); | ||
| 1136 | return {}; | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | UNREACHABLE(); | ||
| 1140 | return {}; | ||
| 1141 | } | ||
| 1142 | |||
| 1143 | Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | ||
| 1144 | const auto GeometryPass = [&](std::string_view name) { | ||
| 1145 | if (stage == ShaderType::Geometry && buffer) { | ||
| 1146 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | ||
| 1147 | // set an 0x80000000 index for those and the shader fails to build. Find out why | ||
| 1148 | // this happens and what's its intent. | ||
| 1149 | return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), | ||
| 1150 | max_input_vertices.value()); | ||
| 1151 | } | ||
| 1152 | return std::string(name); | ||
| 1153 | }; | ||
| 1154 | |||
| 1155 | switch (attribute) { | ||
| 1156 | case Attribute::Index::Position: | ||
| 1157 | switch (stage) { | ||
| 1158 | case ShaderType::Geometry: | ||
| 1159 | return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), | ||
| 1160 | GetSwizzle(element)), | ||
| 1161 | Type::Float}; | ||
| 1162 | case ShaderType::Fragment: | ||
| 1163 | return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; | ||
| 1164 | default: | ||
| 1165 | UNREACHABLE(); | ||
| 1166 | return {"0", Type::Int}; | ||
| 1167 | } | ||
| 1168 | case Attribute::Index::FrontColor: | ||
| 1169 | return {"gl_Color"s + GetSwizzle(element), Type::Float}; | ||
| 1170 | case Attribute::Index::FrontSecondaryColor: | ||
| 1171 | return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; | ||
| 1172 | case Attribute::Index::PointCoord: | ||
| 1173 | switch (element) { | ||
| 1174 | case 0: | ||
| 1175 | return {"gl_PointCoord.x", Type::Float}; | ||
| 1176 | case 1: | ||
| 1177 | return {"gl_PointCoord.y", Type::Float}; | ||
| 1178 | case 2: | ||
| 1179 | case 3: | ||
| 1180 | return {"0.0f", Type::Float}; | ||
| 1181 | } | ||
| 1182 | UNREACHABLE(); | ||
| 1183 | return {"0", Type::Int}; | ||
| 1184 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1185 | // TODO(Subv): Find out what the values are for the first two elements when inside a | ||
| 1186 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | ||
| 1187 | // shader. | ||
| 1188 | ASSERT(stage == ShaderType::Vertex); | ||
| 1189 | switch (element) { | ||
| 1190 | case 2: | ||
| 1191 | // Config pack's first value is instance_id. | ||
| 1192 | return {"gl_InstanceID", Type::Int}; | ||
| 1193 | case 3: | ||
| 1194 | return {"gl_VertexID", Type::Int}; | ||
| 1195 | } | ||
| 1196 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1197 | return {"0", Type::Int}; | ||
| 1198 | case Attribute::Index::FrontFacing: | ||
| 1199 | // TODO(Subv): Find out what the values are for the other elements. | ||
| 1200 | ASSERT(stage == ShaderType::Fragment); | ||
| 1201 | switch (element) { | ||
| 1202 | case 3: | ||
| 1203 | return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; | ||
| 1204 | } | ||
| 1205 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | ||
| 1206 | return {"0", Type::Int}; | ||
| 1207 | default: | ||
| 1208 | if (IsGenericAttribute(attribute)) { | ||
| 1209 | return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), | ||
| 1210 | Type::Float}; | ||
| 1211 | } | ||
| 1212 | if (IsLegacyTexCoord(attribute)) { | ||
| 1213 | UNIMPLEMENTED_IF(stage == ShaderType::Geometry); | ||
| 1214 | return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), | ||
| 1215 | GetSwizzle(element)), | ||
| 1216 | Type::Float}; | ||
| 1217 | } | ||
| 1218 | break; | ||
| 1219 | } | ||
| 1220 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); | ||
| 1221 | return {"0", Type::Int}; | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | Expression ApplyPrecise(Operation operation, std::string value, Type type) { | ||
| 1225 | if (!IsPrecise(operation)) { | ||
| 1226 | return {std::move(value), type}; | ||
| 1227 | } | ||
| 1228 | // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to | ||
| 1229 | // be found in fragment shaders, so we disable precise there. There are vertex shaders that | ||
| 1230 | // also fail to build but nobody seems to care about those. | ||
| 1231 | // Note: Only bugged drivers will skip precise. | ||
| 1232 | const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; | ||
| 1233 | |||
| 1234 | std::string temporary = code.GenerateTemporary(); | ||
| 1235 | code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), | ||
| 1236 | temporary, value); | ||
| 1237 | return {std::move(temporary), type}; | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | Expression VisitOperand(Operation operation, std::size_t operand_index) { | ||
| 1241 | const auto& operand = operation[operand_index]; | ||
| 1242 | const bool parent_precise = IsPrecise(operation); | ||
| 1243 | const bool child_precise = IsPrecise(operand); | ||
| 1244 | const bool child_trivial = !std::holds_alternative<OperationNode>(*operand); | ||
| 1245 | if (!parent_precise || child_precise || child_trivial) { | ||
| 1246 | return Visit(operand); | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | Expression value = Visit(operand); | ||
| 1250 | std::string temporary = code.GenerateTemporary(); | ||
| 1251 | code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); | ||
| 1252 | return {std::move(temporary), value.GetType()}; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { | ||
| 1256 | const u32 element = abuf->GetElement(); | ||
| 1257 | switch (const auto attribute = abuf->GetIndex()) { | ||
| 1258 | case Attribute::Index::Position: | ||
| 1259 | return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; | ||
| 1260 | case Attribute::Index::LayerViewportPointSize: | ||
| 1261 | switch (element) { | ||
| 1262 | case 0: | ||
| 1263 | UNIMPLEMENTED(); | ||
| 1264 | return std::nullopt; | ||
| 1265 | case 1: | ||
| 1266 | if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { | ||
| 1267 | return std::nullopt; | ||
| 1268 | } | ||
| 1269 | return {{"gl_Layer", Type::Int}}; | ||
| 1270 | case 2: | ||
| 1271 | if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { | ||
| 1272 | return std::nullopt; | ||
| 1273 | } | ||
| 1274 | return {{"gl_ViewportIndex", Type::Int}}; | ||
| 1275 | case 3: | ||
| 1276 | return {{"gl_PointSize", Type::Float}}; | ||
| 1277 | } | ||
| 1278 | return std::nullopt; | ||
| 1279 | case Attribute::Index::FrontColor: | ||
| 1280 | return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1281 | case Attribute::Index::FrontSecondaryColor: | ||
| 1282 | return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1283 | case Attribute::Index::BackColor: | ||
| 1284 | return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1285 | case Attribute::Index::BackSecondaryColor: | ||
| 1286 | return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1287 | case Attribute::Index::ClipDistances0123: | ||
| 1288 | return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; | ||
| 1289 | case Attribute::Index::ClipDistances4567: | ||
| 1290 | return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; | ||
| 1291 | default: | ||
| 1292 | if (IsGenericAttribute(attribute)) { | ||
| 1293 | return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; | ||
| 1294 | } | ||
| 1295 | if (IsLegacyTexCoord(attribute)) { | ||
| 1296 | return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), | ||
| 1297 | GetSwizzle(element)), | ||
| 1298 | Type::Float}}; | ||
| 1299 | } | ||
| 1300 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); | ||
| 1301 | return std::nullopt; | ||
| 1302 | } | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, | ||
| 1306 | Type type_a) { | ||
| 1307 | std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); | ||
| 1308 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, | ||
| 1312 | Type type_a, Type type_b) { | ||
| 1313 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1314 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1315 | std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); | ||
| 1316 | |||
| 1317 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, | ||
| 1321 | Type type_a, Type type_b) { | ||
| 1322 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1323 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1324 | std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); | ||
| 1325 | |||
| 1326 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, | ||
| 1330 | Type type_a, Type type_b, Type type_c) { | ||
| 1331 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1332 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1333 | const std::string op_c = VisitOperand(operation, 2).As(type_c); | ||
| 1334 | std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); | ||
| 1335 | |||
| 1336 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, | ||
| 1340 | Type type_a, Type type_b, Type type_c, Type type_d) { | ||
| 1341 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1342 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1343 | const std::string op_c = VisitOperand(operation, 2).As(type_c); | ||
| 1344 | const std::string op_d = VisitOperand(operation, 3).As(type_d); | ||
| 1345 | std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); | ||
| 1346 | |||
| 1347 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | std::string GenerateTexture(Operation operation, const std::string& function_suffix, | ||
| 1351 | const std::vector<TextureIR>& extras, bool separate_dc = false) { | ||
| 1352 | constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; | ||
| 1353 | |||
| 1354 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 1355 | ASSERT(meta); | ||
| 1356 | |||
| 1357 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1358 | const bool has_array = meta->sampler.is_array; | ||
| 1359 | const bool has_shadow = meta->sampler.is_shadow; | ||
| 1360 | const bool workaround_lod_array_shadow_as_grad = | ||
| 1361 | !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && | ||
| 1362 | ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || | ||
| 1363 | meta->sampler.type == TextureType::TextureCube); | ||
| 1364 | |||
| 1365 | std::string expr = "texture"; | ||
| 1366 | |||
| 1367 | if (workaround_lod_array_shadow_as_grad) { | ||
| 1368 | expr += "Grad"; | ||
| 1369 | } else { | ||
| 1370 | expr += function_suffix; | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | if (!meta->aoffi.empty()) { | ||
| 1374 | expr += "Offset"; | ||
| 1375 | } else if (!meta->ptp.empty()) { | ||
| 1376 | expr += "Offsets"; | ||
| 1377 | } | ||
| 1378 | if (!meta->sampler.is_indexed) { | ||
| 1379 | expr += '(' + GetSampler(meta->sampler) + ", "; | ||
| 1380 | } else { | ||
| 1381 | expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; | ||
| 1382 | } | ||
| 1383 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + | ||
| 1384 | (has_shadow && !separate_dc ? 1 : 0) - 1); | ||
| 1385 | expr += '('; | ||
| 1386 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1387 | expr += Visit(operation[i]).AsFloat(); | ||
| 1388 | |||
| 1389 | const std::size_t next = i + 1; | ||
| 1390 | if (next < count) | ||
| 1391 | expr += ", "; | ||
| 1392 | } | ||
| 1393 | if (has_array) { | ||
| 1394 | expr += ", float(" + Visit(meta->array).AsInt() + ')'; | ||
| 1395 | } | ||
| 1396 | if (has_shadow) { | ||
| 1397 | if (separate_dc) { | ||
| 1398 | expr += "), " + Visit(meta->depth_compare).AsFloat(); | ||
| 1399 | } else { | ||
| 1400 | expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; | ||
| 1401 | } | ||
| 1402 | } else { | ||
| 1403 | expr += ')'; | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | if (workaround_lod_array_shadow_as_grad) { | ||
| 1407 | switch (meta->sampler.type) { | ||
| 1408 | case TextureType::Texture2D: | ||
| 1409 | return expr + ", vec2(0.0), vec2(0.0))"; | ||
| 1410 | case TextureType::TextureCube: | ||
| 1411 | return expr + ", vec3(0.0), vec3(0.0))"; | ||
| 1412 | default: | ||
| 1413 | UNREACHABLE(); | ||
| 1414 | break; | ||
| 1415 | } | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | for (const auto& variant : extras) { | ||
| 1419 | if (const auto argument = std::get_if<TextureArgument>(&variant)) { | ||
| 1420 | expr += GenerateTextureArgument(*argument); | ||
| 1421 | } else if (std::holds_alternative<TextureOffset>(variant)) { | ||
| 1422 | if (!meta->aoffi.empty()) { | ||
| 1423 | expr += GenerateTextureAoffi(meta->aoffi); | ||
| 1424 | } else if (!meta->ptp.empty()) { | ||
| 1425 | expr += GenerateTexturePtp(meta->ptp); | ||
| 1426 | } | ||
| 1427 | } else if (std::holds_alternative<TextureDerivates>(variant)) { | ||
| 1428 | expr += GenerateTextureDerivates(meta->derivates); | ||
| 1429 | } else { | ||
| 1430 | UNREACHABLE(); | ||
| 1431 | } | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | return expr + ')'; | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | std::string GenerateTextureArgument(const TextureArgument& argument) { | ||
| 1438 | const auto& [type, operand] = argument; | ||
| 1439 | if (operand == nullptr) { | ||
| 1440 | return {}; | ||
| 1441 | } | ||
| 1442 | |||
| 1443 | std::string expr = ", "; | ||
| 1444 | switch (type) { | ||
| 1445 | case Type::Int: | ||
| 1446 | if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { | ||
| 1447 | // Inline the string as an immediate integer in GLSL (some extra arguments are | ||
| 1448 | // required to be constant) | ||
| 1449 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 1450 | } else { | ||
| 1451 | expr += Visit(operand).AsInt(); | ||
| 1452 | } | ||
| 1453 | break; | ||
| 1454 | case Type::Float: | ||
| 1455 | expr += Visit(operand).AsFloat(); | ||
| 1456 | break; | ||
| 1457 | default: { | ||
| 1458 | const auto type_int = static_cast<u32>(type); | ||
| 1459 | UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | ||
| 1460 | expr += '0'; | ||
| 1461 | break; | ||
| 1462 | } | ||
| 1463 | } | ||
| 1464 | return expr; | ||
| 1465 | } | ||
| 1466 | |||
| 1467 | std::string ReadTextureOffset(const Node& value) { | ||
| 1468 | if (const auto immediate = std::get_if<ImmediateNode>(&*value)) { | ||
| 1469 | // Inline the string as an immediate integer in GLSL (AOFFI arguments are required | ||
| 1470 | // to be constant by the standard). | ||
| 1471 | return std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 1472 | } else if (device.HasVariableAoffi()) { | ||
| 1473 | // Avoid using variable AOFFI on unsupported devices. | ||
| 1474 | return Visit(value).AsInt(); | ||
| 1475 | } else { | ||
| 1476 | // Insert 0 on devices not supporting variable AOFFI. | ||
| 1477 | return "0"; | ||
| 1478 | } | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { | ||
| 1482 | if (aoffi.empty()) { | ||
| 1483 | return {}; | ||
| 1484 | } | ||
| 1485 | constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; | ||
| 1486 | std::string expr = ", "; | ||
| 1487 | expr += coord_constructors.at(aoffi.size() - 1); | ||
| 1488 | expr += '('; | ||
| 1489 | |||
| 1490 | for (std::size_t index = 0; index < aoffi.size(); ++index) { | ||
| 1491 | expr += ReadTextureOffset(aoffi.at(index)); | ||
| 1492 | if (index + 1 < aoffi.size()) { | ||
| 1493 | expr += ", "; | ||
| 1494 | } | ||
| 1495 | } | ||
| 1496 | expr += ')'; | ||
| 1497 | |||
| 1498 | return expr; | ||
| 1499 | } | ||
| 1500 | |||
| 1501 | std::string GenerateTexturePtp(const std::vector<Node>& ptp) { | ||
| 1502 | static constexpr std::size_t num_vectors = 4; | ||
| 1503 | ASSERT(ptp.size() == num_vectors * 2); | ||
| 1504 | |||
| 1505 | std::string expr = ", ivec2[]("; | ||
| 1506 | for (std::size_t vector = 0; vector < num_vectors; ++vector) { | ||
| 1507 | const bool has_next = vector + 1 < num_vectors; | ||
| 1508 | expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), | ||
| 1509 | ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); | ||
| 1510 | } | ||
| 1511 | expr += ')'; | ||
| 1512 | return expr; | ||
| 1513 | } | ||
| 1514 | |||
| 1515 | std::string GenerateTextureDerivates(const std::vector<Node>& derivates) { | ||
| 1516 | if (derivates.empty()) { | ||
| 1517 | return {}; | ||
| 1518 | } | ||
| 1519 | constexpr std::array coord_constructors = {"float", "vec2", "vec3"}; | ||
| 1520 | std::string expr = ", "; | ||
| 1521 | const std::size_t components = derivates.size() / 2; | ||
| 1522 | std::string dx = coord_constructors.at(components - 1); | ||
| 1523 | std::string dy = coord_constructors.at(components - 1); | ||
| 1524 | dx += '('; | ||
| 1525 | dy += '('; | ||
| 1526 | |||
| 1527 | for (std::size_t index = 0; index < components; ++index) { | ||
| 1528 | const auto& operand_x{derivates.at(index * 2)}; | ||
| 1529 | const auto& operand_y{derivates.at(index * 2 + 1)}; | ||
| 1530 | dx += Visit(operand_x).AsFloat(); | ||
| 1531 | dy += Visit(operand_y).AsFloat(); | ||
| 1532 | |||
| 1533 | if (index + 1 < components) { | ||
| 1534 | dx += ", "; | ||
| 1535 | dy += ", "; | ||
| 1536 | } | ||
| 1537 | } | ||
| 1538 | dx += ')'; | ||
| 1539 | dy += ')'; | ||
| 1540 | expr += dx + ", " + dy; | ||
| 1541 | |||
| 1542 | return expr; | ||
| 1543 | } | ||
| 1544 | |||
| 1545 | std::string BuildIntegerCoordinates(Operation operation) { | ||
| 1546 | constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; | ||
| 1547 | const std::size_t coords_count{operation.GetOperandsCount()}; | ||
| 1548 | std::string expr = constructors.at(coords_count - 1); | ||
| 1549 | for (std::size_t i = 0; i < coords_count; ++i) { | ||
| 1550 | expr += VisitOperand(operation, i).AsInt(); | ||
| 1551 | if (i + 1 < coords_count) { | ||
| 1552 | expr += ", "; | ||
| 1553 | } | ||
| 1554 | } | ||
| 1555 | expr += ')'; | ||
| 1556 | return expr; | ||
| 1557 | } | ||
| 1558 | |||
| 1559 | std::string BuildImageValues(Operation operation) { | ||
| 1560 | constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; | ||
| 1561 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1562 | |||
| 1563 | const std::size_t values_count{meta.values.size()}; | ||
| 1564 | std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); | ||
| 1565 | for (std::size_t i = 0; i < values_count; ++i) { | ||
| 1566 | expr += Visit(meta.values.at(i)).AsUint(); | ||
| 1567 | if (i + 1 < values_count) { | ||
| 1568 | expr += ", "; | ||
| 1569 | } | ||
| 1570 | } | ||
| 1571 | expr += ')'; | ||
| 1572 | return expr; | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | Expression Assign(Operation operation) { | ||
| 1576 | const Node& dest = operation[0]; | ||
| 1577 | const Node& src = operation[1]; | ||
| 1578 | |||
| 1579 | Expression target; | ||
| 1580 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1581 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1582 | // Writing to Register::ZeroIndex is a no op but we still have to visit the source | ||
| 1583 | // as it might have side effects. | ||
| 1584 | code.AddLine("{};", Visit(src).GetCode()); | ||
| 1585 | return {}; | ||
| 1586 | } | ||
| 1587 | target = {GetRegister(gpr->GetIndex()), Type::Float}; | ||
| 1588 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1589 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); | ||
| 1590 | auto output = GetOutputAttribute(abuf); | ||
| 1591 | if (!output) { | ||
| 1592 | return {}; | ||
| 1593 | } | ||
| 1594 | target = std::move(*output); | ||
| 1595 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1596 | target = { | ||
| 1597 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), | ||
| 1598 | Type::Uint}; | ||
| 1599 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1600 | ASSERT(stage == ShaderType::Compute); | ||
| 1601 | target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; | ||
| 1602 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1603 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); | ||
| 1604 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); | ||
| 1605 | const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); | ||
| 1606 | target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), | ||
| 1607 | Type::Uint}; | ||
| 1608 | } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { | ||
| 1609 | target = {GetCustomVariable(cv->GetIndex()), Type::Float}; | ||
| 1610 | } else { | ||
| 1611 | UNREACHABLE_MSG("Assign called without a proper target"); | ||
| 1612 | } | ||
| 1613 | |||
| 1614 | code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); | ||
| 1615 | return {}; | ||
| 1616 | } | ||
| 1617 | |||
| 1618 | template <Type type> | ||
| 1619 | Expression Add(Operation operation) { | ||
| 1620 | return GenerateBinaryInfix(operation, "+", type, type, type); | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | template <Type type> | ||
| 1624 | Expression Mul(Operation operation) { | ||
| 1625 | return GenerateBinaryInfix(operation, "*", type, type, type); | ||
| 1626 | } | ||
| 1627 | |||
| 1628 | template <Type type> | ||
| 1629 | Expression Div(Operation operation) { | ||
| 1630 | return GenerateBinaryInfix(operation, "/", type, type, type); | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | template <Type type> | ||
| 1634 | Expression Fma(Operation operation) { | ||
| 1635 | return GenerateTernary(operation, "fma", type, type, type, type); | ||
| 1636 | } | ||
| 1637 | |||
| 1638 | template <Type type> | ||
| 1639 | Expression Negate(Operation operation) { | ||
| 1640 | return GenerateUnary(operation, "-", type, type); | ||
| 1641 | } | ||
| 1642 | |||
| 1643 | template <Type type> | ||
| 1644 | Expression Absolute(Operation operation) { | ||
| 1645 | return GenerateUnary(operation, "abs", type, type); | ||
| 1646 | } | ||
| 1647 | |||
| 1648 | Expression FClamp(Operation operation) { | ||
| 1649 | return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, | ||
| 1650 | Type::Float); | ||
| 1651 | } | ||
| 1652 | |||
| 1653 | Expression FCastHalf0(Operation operation) { | ||
| 1654 | return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1655 | } | ||
| 1656 | |||
| 1657 | Expression FCastHalf1(Operation operation) { | ||
| 1658 | return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1659 | } | ||
| 1660 | |||
| 1661 | template <Type type> | ||
| 1662 | Expression Min(Operation operation) { | ||
| 1663 | return GenerateBinaryCall(operation, "min", type, type, type); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | template <Type type> | ||
| 1667 | Expression Max(Operation operation) { | ||
| 1668 | return GenerateBinaryCall(operation, "max", type, type, type); | ||
| 1669 | } | ||
| 1670 | |||
| 1671 | Expression Select(Operation operation) { | ||
| 1672 | const std::string condition = Visit(operation[0]).AsBool(); | ||
| 1673 | const std::string true_case = Visit(operation[1]).AsUint(); | ||
| 1674 | const std::string false_case = Visit(operation[2]).AsUint(); | ||
| 1675 | std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); | ||
| 1676 | |||
| 1677 | return ApplyPrecise(operation, std::move(op_str), Type::Uint); | ||
| 1678 | } | ||
| 1679 | |||
| 1680 | Expression FCos(Operation operation) { | ||
| 1681 | return GenerateUnary(operation, "cos", Type::Float, Type::Float); | ||
| 1682 | } | ||
| 1683 | |||
| 1684 | Expression FSin(Operation operation) { | ||
| 1685 | return GenerateUnary(operation, "sin", Type::Float, Type::Float); | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | Expression FExp2(Operation operation) { | ||
| 1689 | return GenerateUnary(operation, "exp2", Type::Float, Type::Float); | ||
| 1690 | } | ||
| 1691 | |||
| 1692 | Expression FLog2(Operation operation) { | ||
| 1693 | return GenerateUnary(operation, "log2", Type::Float, Type::Float); | ||
| 1694 | } | ||
| 1695 | |||
| 1696 | Expression FInverseSqrt(Operation operation) { | ||
| 1697 | return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | Expression FSqrt(Operation operation) { | ||
| 1701 | return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); | ||
| 1702 | } | ||
| 1703 | |||
| 1704 | Expression FRoundEven(Operation operation) { | ||
| 1705 | return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); | ||
| 1706 | } | ||
| 1707 | |||
| 1708 | Expression FFloor(Operation operation) { | ||
| 1709 | return GenerateUnary(operation, "floor", Type::Float, Type::Float); | ||
| 1710 | } | ||
| 1711 | |||
| 1712 | Expression FCeil(Operation operation) { | ||
| 1713 | return GenerateUnary(operation, "ceil", Type::Float, Type::Float); | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | Expression FTrunc(Operation operation) { | ||
| 1717 | return GenerateUnary(operation, "trunc", Type::Float, Type::Float); | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | template <Type type> | ||
| 1721 | Expression FCastInteger(Operation operation) { | ||
| 1722 | return GenerateUnary(operation, "float", Type::Float, type); | ||
| 1723 | } | ||
| 1724 | |||
| 1725 | Expression FSwizzleAdd(Operation operation) { | ||
| 1726 | const std::string op_a = VisitOperand(operation, 0).AsFloat(); | ||
| 1727 | const std::string op_b = VisitOperand(operation, 1).AsFloat(); | ||
| 1728 | |||
| 1729 | if (!device.HasShaderBallot()) { | ||
| 1730 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 1731 | return {fmt::format("{} + {}", op_a, op_b), Type::Float}; | ||
| 1732 | } | ||
| 1733 | |||
| 1734 | const std::string instr_mask = VisitOperand(operation, 2).AsUint(); | ||
| 1735 | const std::string mask = code.GenerateTemporary(); | ||
| 1736 | code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, | ||
| 1737 | instr_mask); | ||
| 1738 | |||
| 1739 | const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); | ||
| 1740 | const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); | ||
| 1741 | return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), | ||
| 1742 | Type::Float}; | ||
| 1743 | } | ||
| 1744 | |||
| 1745 | Expression ICastFloat(Operation operation) { | ||
| 1746 | return GenerateUnary(operation, "int", Type::Int, Type::Float); | ||
| 1747 | } | ||
| 1748 | |||
| 1749 | Expression ICastUnsigned(Operation operation) { | ||
| 1750 | return GenerateUnary(operation, "int", Type::Int, Type::Uint); | ||
| 1751 | } | ||
| 1752 | |||
| 1753 | template <Type type> | ||
| 1754 | Expression LogicalShiftLeft(Operation operation) { | ||
| 1755 | return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); | ||
| 1756 | } | ||
| 1757 | |||
| 1758 | Expression ILogicalShiftRight(Operation operation) { | ||
| 1759 | const std::string op_a = VisitOperand(operation, 0).AsUint(); | ||
| 1760 | const std::string op_b = VisitOperand(operation, 1).AsUint(); | ||
| 1761 | std::string op_str = fmt::format("int({} >> {})", op_a, op_b); | ||
| 1762 | |||
| 1763 | return ApplyPrecise(operation, std::move(op_str), Type::Int); | ||
| 1764 | } | ||
| 1765 | |||
| 1766 | Expression IArithmeticShiftRight(Operation operation) { | ||
| 1767 | return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | template <Type type> | ||
| 1771 | Expression BitwiseAnd(Operation operation) { | ||
| 1772 | return GenerateBinaryInfix(operation, "&", type, type, type); | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | template <Type type> | ||
| 1776 | Expression BitwiseOr(Operation operation) { | ||
| 1777 | return GenerateBinaryInfix(operation, "|", type, type, type); | ||
| 1778 | } | ||
| 1779 | |||
| 1780 | template <Type type> | ||
| 1781 | Expression BitwiseXor(Operation operation) { | ||
| 1782 | return GenerateBinaryInfix(operation, "^", type, type, type); | ||
| 1783 | } | ||
| 1784 | |||
| 1785 | template <Type type> | ||
| 1786 | Expression BitwiseNot(Operation operation) { | ||
| 1787 | return GenerateUnary(operation, "~", type, type); | ||
| 1788 | } | ||
| 1789 | |||
| 1790 | Expression UCastFloat(Operation operation) { | ||
| 1791 | return GenerateUnary(operation, "uint", Type::Uint, Type::Float); | ||
| 1792 | } | ||
| 1793 | |||
| 1794 | Expression UCastSigned(Operation operation) { | ||
| 1795 | return GenerateUnary(operation, "uint", Type::Uint, Type::Int); | ||
| 1796 | } | ||
| 1797 | |||
| 1798 | Expression UShiftRight(Operation operation) { | ||
| 1799 | return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | template <Type type> | ||
| 1803 | Expression BitfieldInsert(Operation operation) { | ||
| 1804 | return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, | ||
| 1805 | Type::Int); | ||
| 1806 | } | ||
| 1807 | |||
| 1808 | template <Type type> | ||
| 1809 | Expression BitfieldExtract(Operation operation) { | ||
| 1810 | return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); | ||
| 1811 | } | ||
| 1812 | |||
| 1813 | template <Type type> | ||
| 1814 | Expression BitCount(Operation operation) { | ||
| 1815 | return GenerateUnary(operation, "bitCount", type, type); | ||
| 1816 | } | ||
| 1817 | |||
| 1818 | template <Type type> | ||
| 1819 | Expression BitMSB(Operation operation) { | ||
| 1820 | return GenerateUnary(operation, "findMSB", type, type); | ||
| 1821 | } | ||
| 1822 | |||
| 1823 | Expression HNegate(Operation operation) { | ||
| 1824 | const auto GetNegate = [&](std::size_t index) { | ||
| 1825 | return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; | ||
| 1826 | }; | ||
| 1827 | return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), | ||
| 1828 | GetNegate(1), GetNegate(2)), | ||
| 1829 | Type::HalfFloat}; | ||
| 1830 | } | ||
| 1831 | |||
| 1832 | Expression HClamp(Operation operation) { | ||
| 1833 | const std::string value = VisitOperand(operation, 0).AsHalfFloat(); | ||
| 1834 | const std::string min = VisitOperand(operation, 1).AsFloat(); | ||
| 1835 | const std::string max = VisitOperand(operation, 2).AsFloat(); | ||
| 1836 | std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); | ||
| 1837 | |||
| 1838 | return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); | ||
| 1839 | } | ||
| 1840 | |||
| 1841 | Expression HCastFloat(Operation operation) { | ||
| 1842 | return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()), | ||
| 1843 | Type::HalfFloat}; | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | Expression HUnpack(Operation operation) { | ||
| 1847 | Expression operand = VisitOperand(operation, 0); | ||
| 1848 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1849 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1850 | return operand; | ||
| 1851 | case Tegra::Shader::HalfType::F32: | ||
| 1852 | return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; | ||
| 1853 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1854 | return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; | ||
| 1855 | case Tegra::Shader::HalfType::H1_H1: | ||
| 1856 | return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; | ||
| 1857 | } | ||
| 1858 | UNREACHABLE(); | ||
| 1859 | return {"0", Type::Int}; | ||
| 1860 | } | ||
| 1861 | |||
| 1862 | Expression HMergeF32(Operation operation) { | ||
| 1863 | return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | Expression HMergeH0(Operation operation) { | ||
| 1867 | const std::string dest = VisitOperand(operation, 0).AsUint(); | ||
| 1868 | const std::string src = VisitOperand(operation, 1).AsUint(); | ||
| 1869 | return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), | ||
| 1870 | Type::HalfFloat}; | ||
| 1871 | } | ||
| 1872 | |||
| 1873 | Expression HMergeH1(Operation operation) { | ||
| 1874 | const std::string dest = VisitOperand(operation, 0).AsUint(); | ||
| 1875 | const std::string src = VisitOperand(operation, 1).AsUint(); | ||
| 1876 | return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), | ||
| 1877 | Type::HalfFloat}; | ||
| 1878 | } | ||
| 1879 | |||
| 1880 | Expression HPack2(Operation operation) { | ||
| 1881 | return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), | ||
| 1882 | VisitOperand(operation, 1).AsFloat()), | ||
| 1883 | Type::HalfFloat}; | ||
| 1884 | } | ||
| 1885 | |||
| 1886 | template <const std::string_view& op, Type type, bool unordered = false> | ||
| 1887 | Expression Comparison(Operation operation) { | ||
| 1888 | static_assert(!unordered || type == Type::Float); | ||
| 1889 | |||
| 1890 | Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); | ||
| 1891 | |||
| 1892 | if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { | ||
| 1893 | // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's | ||
| 1894 | // and Nvidia's proprietary stacks. Manually force an ordered comparison. | ||
| 1895 | return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), | ||
| 1896 | VisitOperand(operation, 0).AsFloat(), | ||
| 1897 | VisitOperand(operation, 1).AsFloat()), | ||
| 1898 | Type::Bool}; | ||
| 1899 | } | ||
| 1900 | if constexpr (!unordered) { | ||
| 1901 | return expr; | ||
| 1902 | } | ||
| 1903 | // Unordered comparisons are always true for NaN operands. | ||
| 1904 | return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), | ||
| 1905 | VisitOperand(operation, 0).AsFloat(), | ||
| 1906 | VisitOperand(operation, 1).AsFloat()), | ||
| 1907 | Type::Bool}; | ||
| 1908 | } | ||
| 1909 | |||
| 1910 | Expression FOrdered(Operation operation) { | ||
| 1911 | return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), | ||
| 1912 | VisitOperand(operation, 1).AsFloat()), | ||
| 1913 | Type::Bool}; | ||
| 1914 | } | ||
| 1915 | |||
| 1916 | Expression FUnordered(Operation operation) { | ||
| 1917 | return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), | ||
| 1918 | VisitOperand(operation, 1).AsFloat()), | ||
| 1919 | Type::Bool}; | ||
| 1920 | } | ||
| 1921 | |||
| 1922 | Expression LogicalAddCarry(Operation operation) { | ||
| 1923 | const std::string carry = code.GenerateTemporary(); | ||
| 1924 | code.AddLine("uint {};", carry); | ||
| 1925 | code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), | ||
| 1926 | VisitOperand(operation, 1).AsUint(), carry); | ||
| 1927 | return {fmt::format("({} != 0)", carry), Type::Bool}; | ||
| 1928 | } | ||
| 1929 | |||
| 1930 | Expression LogicalAssign(Operation operation) { | ||
| 1931 | const Node& dest = operation[0]; | ||
| 1932 | const Node& src = operation[1]; | ||
| 1933 | |||
| 1934 | std::string target; | ||
| 1935 | |||
| 1936 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1937 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1938 | |||
| 1939 | const auto index = pred->GetIndex(); | ||
| 1940 | switch (index) { | ||
| 1941 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1942 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1943 | // Writing to these predicates is a no-op | ||
| 1944 | return {}; | ||
| 1945 | } | ||
| 1946 | target = GetPredicate(index); | ||
| 1947 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1948 | target = GetInternalFlag(flag->GetFlag()); | ||
| 1949 | } | ||
| 1950 | |||
| 1951 | code.AddLine("{} = {};", target, Visit(src).AsBool()); | ||
| 1952 | return {}; | ||
| 1953 | } | ||
| 1954 | |||
| 1955 | Expression LogicalAnd(Operation operation) { | ||
| 1956 | return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); | ||
| 1957 | } | ||
| 1958 | |||
| 1959 | Expression LogicalOr(Operation operation) { | ||
| 1960 | return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); | ||
| 1961 | } | ||
| 1962 | |||
| 1963 | Expression LogicalXor(Operation operation) { | ||
| 1964 | return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | Expression LogicalNegate(Operation operation) { | ||
| 1968 | return GenerateUnary(operation, "!", Type::Bool, Type::Bool); | ||
| 1969 | } | ||
| 1970 | |||
| 1971 | Expression LogicalPick2(Operation operation) { | ||
| 1972 | return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), | ||
| 1973 | VisitOperand(operation, 1).AsUint()), | ||
| 1974 | Type::Bool}; | ||
| 1975 | } | ||
| 1976 | |||
| 1977 | Expression LogicalAnd2(Operation operation) { | ||
| 1978 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | template <bool with_nan> | ||
| 1982 | Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { | ||
| 1983 | Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, | ||
| 1984 | Type::HalfFloat, Type::HalfFloat); | ||
| 1985 | if constexpr (!with_nan) { | ||
| 1986 | return comparison; | ||
| 1987 | } | ||
| 1988 | return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), | ||
| 1989 | VisitOperand(operation, 0).AsHalfFloat(), | ||
| 1990 | VisitOperand(operation, 1).AsHalfFloat()), | ||
| 1991 | Type::Bool2}; | ||
| 1992 | } | ||
| 1993 | |||
| 1994 | template <bool with_nan> | ||
| 1995 | Expression Logical2HLessThan(Operation operation) { | ||
| 1996 | return GenerateHalfComparison<with_nan>(operation, "lessThan"); | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | template <bool with_nan> | ||
| 2000 | Expression Logical2HEqual(Operation operation) { | ||
| 2001 | return GenerateHalfComparison<with_nan>(operation, "equal"); | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | template <bool with_nan> | ||
| 2005 | Expression Logical2HLessEqual(Operation operation) { | ||
| 2006 | return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | template <bool with_nan> | ||
| 2010 | Expression Logical2HGreaterThan(Operation operation) { | ||
| 2011 | return GenerateHalfComparison<with_nan>(operation, "greaterThan"); | ||
| 2012 | } | ||
| 2013 | |||
| 2014 | template <bool with_nan> | ||
| 2015 | Expression Logical2HNotEqual(Operation operation) { | ||
| 2016 | return GenerateHalfComparison<with_nan>(operation, "notEqual"); | ||
| 2017 | } | ||
| 2018 | |||
| 2019 | template <bool with_nan> | ||
| 2020 | Expression Logical2HGreaterEqual(Operation operation) { | ||
| 2021 | return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); | ||
| 2022 | } | ||
| 2023 | |||
| 2024 | Expression Texture(Operation operation) { | ||
| 2025 | const auto meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 2026 | const bool separate_dc = meta.sampler.type == TextureType::TextureCube && | ||
| 2027 | meta.sampler.is_array && meta.sampler.is_shadow; | ||
| 2028 | // TODO: Replace this with an array and make GenerateTexture use C++20 std::span | ||
| 2029 | const std::vector<TextureIR> extras{ | ||
| 2030 | TextureOffset{}, | ||
| 2031 | TextureArgument{Type::Float, meta.bias}, | ||
| 2032 | }; | ||
| 2033 | std::string expr = GenerateTexture(operation, "", extras, separate_dc); | ||
| 2034 | if (meta.sampler.is_shadow) { | ||
| 2035 | expr = fmt::format("vec4({})", expr); | ||
| 2036 | } | ||
| 2037 | return {expr + GetSwizzle(meta.element), Type::Float}; | ||
| 2038 | } | ||
| 2039 | |||
| 2040 | Expression TextureLod(Operation operation) { | ||
| 2041 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2042 | ASSERT(meta); | ||
| 2043 | |||
| 2044 | std::string expr{}; | ||
| 2045 | |||
| 2046 | if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && | ||
| 2047 | ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || | ||
| 2048 | meta->sampler.type == TextureType::TextureCube)) { | ||
| 2049 | LOG_ERROR(Render_OpenGL, | ||
| 2050 | "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); | ||
| 2051 | expr = GenerateTexture(operation, "Lod", {}); | ||
| 2052 | } else { | ||
| 2053 | expr = GenerateTexture(operation, "Lod", | ||
| 2054 | {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | if (meta->sampler.is_shadow) { | ||
| 2058 | expr = "vec4(" + expr + ')'; | ||
| 2059 | } | ||
| 2060 | return {expr + GetSwizzle(meta->element), Type::Float}; | ||
| 2061 | } | ||
| 2062 | |||
| 2063 | Expression TextureGather(Operation operation) { | ||
| 2064 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 2065 | |||
| 2066 | const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; | ||
| 2067 | const bool separate_dc = meta.sampler.is_shadow; | ||
| 2068 | |||
| 2069 | std::vector<TextureIR> ir_; | ||
| 2070 | if (meta.sampler.is_shadow) { | ||
| 2071 | ir_ = {TextureOffset{}}; | ||
| 2072 | } else { | ||
| 2073 | ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; | ||
| 2074 | } | ||
| 2075 | return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), | ||
| 2076 | Type::Float}; | ||
| 2077 | } | ||
| 2078 | |||
| 2079 | Expression TextureQueryDimensions(Operation operation) { | ||
| 2080 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2081 | ASSERT(meta); | ||
| 2082 | |||
| 2083 | const std::string sampler = GetSampler(meta->sampler); | ||
| 2084 | const std::string lod = VisitOperand(operation, 0).AsInt(); | ||
| 2085 | |||
| 2086 | switch (meta->element) { | ||
| 2087 | case 0: | ||
| 2088 | case 1: | ||
| 2089 | return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), | ||
| 2090 | Type::Int}; | ||
| 2091 | case 3: | ||
| 2092 | return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; | ||
| 2093 | } | ||
| 2094 | UNREACHABLE(); | ||
| 2095 | return {"0", Type::Int}; | ||
| 2096 | } | ||
| 2097 | |||
| 2098 | Expression TextureQueryLod(Operation operation) { | ||
| 2099 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2100 | ASSERT(meta); | ||
| 2101 | |||
| 2102 | if (meta->element < 2) { | ||
| 2103 | return {fmt::format("int(({} * vec2(256)){})", | ||
| 2104 | GenerateTexture(operation, "QueryLod", {}), | ||
| 2105 | GetSwizzle(meta->element)), | ||
| 2106 | Type::Int}; | ||
| 2107 | } | ||
| 2108 | return {"0", Type::Int}; | ||
| 2109 | } | ||
| 2110 | |||
| 2111 | Expression TexelFetch(Operation operation) { | ||
| 2112 | constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; | ||
| 2113 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2114 | ASSERT(meta); | ||
| 2115 | UNIMPLEMENTED_IF(meta->sampler.is_array); | ||
| 2116 | const std::size_t count = operation.GetOperandsCount(); | ||
| 2117 | |||
| 2118 | std::string expr = "texelFetch("; | ||
| 2119 | expr += GetSampler(meta->sampler); | ||
| 2120 | expr += ", "; | ||
| 2121 | |||
| 2122 | expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); | ||
| 2123 | expr += '('; | ||
| 2124 | for (std::size_t i = 0; i < count; ++i) { | ||
| 2125 | if (i > 0) { | ||
| 2126 | expr += ", "; | ||
| 2127 | } | ||
| 2128 | expr += VisitOperand(operation, i).AsInt(); | ||
| 2129 | } | ||
| 2130 | if (meta->array) { | ||
| 2131 | expr += ", "; | ||
| 2132 | expr += Visit(meta->array).AsInt(); | ||
| 2133 | } | ||
| 2134 | expr += ')'; | ||
| 2135 | |||
| 2136 | if (meta->lod && !meta->sampler.is_buffer) { | ||
| 2137 | expr += ", "; | ||
| 2138 | expr += Visit(meta->lod).AsInt(); | ||
| 2139 | } | ||
| 2140 | expr += ')'; | ||
| 2141 | expr += GetSwizzle(meta->element); | ||
| 2142 | |||
| 2143 | return {std::move(expr), Type::Float}; | ||
| 2144 | } | ||
| 2145 | |||
| 2146 | Expression TextureGradient(Operation operation) { | ||
| 2147 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 2148 | std::string expr = | ||
| 2149 | GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); | ||
| 2150 | return {std::move(expr) + GetSwizzle(meta.element), Type::Float}; | ||
| 2151 | } | ||
| 2152 | |||
| 2153 | Expression ImageLoad(Operation operation) { | ||
| 2154 | if (!device.HasImageLoadFormatted()) { | ||
| 2155 | LOG_ERROR(Render_OpenGL, | ||
| 2156 | "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); | ||
| 2157 | return {"0", Type::Int}; | ||
| 2158 | } | ||
| 2159 | |||
| 2160 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2161 | return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), | ||
| 2162 | BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), | ||
| 2163 | Type::Uint}; | ||
| 2164 | } | ||
| 2165 | |||
| 2166 | Expression ImageStore(Operation operation) { | ||
| 2167 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2168 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), | ||
| 2169 | BuildIntegerCoordinates(operation), BuildImageValues(operation)); | ||
| 2170 | return {}; | ||
| 2171 | } | ||
| 2172 | |||
| 2173 | template <const std::string_view& opname> | ||
| 2174 | Expression AtomicImage(Operation operation) { | ||
| 2175 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2176 | ASSERT(meta.values.size() == 1); | ||
| 2177 | |||
| 2178 | return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), | ||
| 2179 | BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), | ||
| 2180 | Type::Uint}; | ||
| 2181 | } | ||
| 2182 | |||
| 2183 | template <const std::string_view& opname, Type type> | ||
| 2184 | Expression Atomic(Operation operation) { | ||
| 2185 | if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { | ||
| 2186 | UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); | ||
| 2187 | return {}; | ||
| 2188 | } | ||
| 2189 | return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), | ||
| 2190 | Visit(operation[1]).AsUint()), | ||
| 2191 | Type::Uint}; | ||
| 2192 | } | ||
| 2193 | |||
| 2194 | template <const std::string_view& opname, Type type> | ||
| 2195 | Expression Reduce(Operation operation) { | ||
| 2196 | code.AddLine("{};", Atomic<opname, type>(operation).GetCode()); | ||
| 2197 | return {}; | ||
| 2198 | } | ||
| 2199 | |||
| 2200 | Expression Branch(Operation operation) { | ||
| 2201 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | ||
| 2202 | UNIMPLEMENTED_IF(!target); | ||
| 2203 | |||
| 2204 | code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); | ||
| 2205 | code.AddLine("break;"); | ||
| 2206 | return {}; | ||
| 2207 | } | ||
| 2208 | |||
| 2209 | Expression BranchIndirect(Operation operation) { | ||
| 2210 | const std::string op_a = VisitOperand(operation, 0).AsUint(); | ||
| 2211 | |||
| 2212 | code.AddLine("jmp_to = {};", op_a); | ||
| 2213 | code.AddLine("break;"); | ||
| 2214 | return {}; | ||
| 2215 | } | ||
| 2216 | |||
| 2217 | Expression PushFlowStack(Operation operation) { | ||
| 2218 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2219 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | ||
| 2220 | UNIMPLEMENTED_IF(!target); | ||
| 2221 | |||
| 2222 | code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), | ||
| 2223 | target->GetValue()); | ||
| 2224 | return {}; | ||
| 2225 | } | ||
| 2226 | |||
| 2227 | Expression PopFlowStack(Operation operation) { | ||
| 2228 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2229 | code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); | ||
| 2230 | code.AddLine("break;"); | ||
| 2231 | return {}; | ||
| 2232 | } | ||
| 2233 | |||
| 2234 | void PreExit() { | ||
| 2235 | if (stage != ShaderType::Fragment) { | ||
| 2236 | return; | ||
| 2237 | } | ||
| 2238 | const auto& used_registers = ir.GetRegisters(); | ||
| 2239 | const auto SafeGetRegister = [&](u32 reg) -> Expression { | ||
| 2240 | // TODO(Rodrigo): Replace with contains once C++20 releases | ||
| 2241 | if (used_registers.find(reg) != used_registers.end()) { | ||
| 2242 | return {GetRegister(reg), Type::Float}; | ||
| 2243 | } | ||
| 2244 | return {"0.0f", Type::Float}; | ||
| 2245 | }; | ||
| 2246 | |||
| 2247 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); | ||
| 2248 | |||
| 2249 | // Write the color outputs using the data in the shader registers, disabled | ||
| 2250 | // rendertargets/components are skipped in the register assignment. | ||
| 2251 | u32 current_reg = 0; | ||
| 2252 | for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { | ||
| 2253 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | ||
| 2254 | for (u32 component = 0; component < 4; ++component) { | ||
| 2255 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||
| 2256 | code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), | ||
| 2257 | SafeGetRegister(current_reg).AsFloat()); | ||
| 2258 | ++current_reg; | ||
| 2259 | } | ||
| 2260 | } | ||
| 2261 | } | ||
| 2262 | if (header.ps.omap.depth) { | ||
| 2263 | // The depth output is always 2 registers after the last color output, and current_reg | ||
| 2264 | // already contains one past the last color register. | ||
| 2265 | code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); | ||
| 2266 | } | ||
| 2267 | } | ||
| 2268 | |||
| 2269 | Expression Exit(Operation operation) { | ||
| 2270 | PreExit(); | ||
| 2271 | code.AddLine("return;"); | ||
| 2272 | return {}; | ||
| 2273 | } | ||
| 2274 | |||
| 2275 | Expression Discard(Operation operation) { | ||
| 2276 | // Enclose "discard" in a conditional, so that GLSL compilation does not complain | ||
| 2277 | // about unexecuted instructions that may follow this. | ||
| 2278 | code.AddLine("if (true) {{"); | ||
| 2279 | ++code.scope; | ||
| 2280 | code.AddLine("discard;"); | ||
| 2281 | --code.scope; | ||
| 2282 | code.AddLine("}}"); | ||
| 2283 | return {}; | ||
| 2284 | } | ||
| 2285 | |||
| 2286 | Expression EmitVertex(Operation operation) { | ||
| 2287 | ASSERT_MSG(stage == ShaderType::Geometry, | ||
| 2288 | "EmitVertex is expected to be used in a geometry shader."); | ||
| 2289 | code.AddLine("EmitVertex();"); | ||
| 2290 | return {}; | ||
| 2291 | } | ||
| 2292 | |||
| 2293 | Expression EndPrimitive(Operation operation) { | ||
| 2294 | ASSERT_MSG(stage == ShaderType::Geometry, | ||
| 2295 | "EndPrimitive is expected to be used in a geometry shader."); | ||
| 2296 | code.AddLine("EndPrimitive();"); | ||
| 2297 | return {}; | ||
| 2298 | } | ||
| 2299 | |||
| 2300 | Expression InvocationId(Operation operation) { | ||
| 2301 | return {"gl_InvocationID", Type::Int}; | ||
| 2302 | } | ||
| 2303 | |||
| 2304 | Expression YNegate(Operation operation) { | ||
| 2305 | // Y_NEGATE is mapped to this uniform value | ||
| 2306 | return {"gl_FrontMaterial.ambient.a", Type::Float}; | ||
| 2307 | } | ||
| 2308 | |||
| 2309 | template <u32 element> | ||
| 2310 | Expression LocalInvocationId(Operation) { | ||
| 2311 | return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; | ||
| 2312 | } | ||
| 2313 | |||
| 2314 | template <u32 element> | ||
| 2315 | Expression WorkGroupId(Operation) { | ||
| 2316 | return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; | ||
| 2317 | } | ||
| 2318 | |||
| 2319 | Expression BallotThread(Operation operation) { | ||
| 2320 | const std::string value = VisitOperand(operation, 0).AsBool(); | ||
| 2321 | if (!device.HasWarpIntrinsics()) { | ||
| 2322 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); | ||
| 2323 | // Stub on non-Nvidia devices by simulating all threads voting the same as the active | ||
| 2324 | // one. | ||
| 2325 | return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; | ||
| 2326 | } | ||
| 2327 | return {fmt::format("ballotThreadNV({})", value), Type::Uint}; | ||
| 2328 | } | ||
| 2329 | |||
| 2330 | Expression Vote(Operation operation, const char* func) { | ||
| 2331 | const std::string value = VisitOperand(operation, 0).AsBool(); | ||
| 2332 | if (!device.HasWarpIntrinsics()) { | ||
| 2333 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); | ||
| 2334 | // Stub with a warp size of one. | ||
| 2335 | return {value, Type::Bool}; | ||
| 2336 | } | ||
| 2337 | return {fmt::format("{}({})", func, value), Type::Bool}; | ||
| 2338 | } | ||
| 2339 | |||
| 2340 | Expression VoteAll(Operation operation) { | ||
| 2341 | return Vote(operation, "allThreadsNV"); | ||
| 2342 | } | ||
| 2343 | |||
| 2344 | Expression VoteAny(Operation operation) { | ||
| 2345 | return Vote(operation, "anyThreadNV"); | ||
| 2346 | } | ||
| 2347 | |||
| 2348 | Expression VoteEqual(Operation operation) { | ||
| 2349 | if (!device.HasWarpIntrinsics()) { | ||
| 2350 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); | ||
| 2351 | // We must return true here since a stub for a theoretical warp size of 1. | ||
| 2352 | // This will always return an equal result across all votes. | ||
| 2353 | return {"true", Type::Bool}; | ||
| 2354 | } | ||
| 2355 | return Vote(operation, "allThreadsEqualNV"); | ||
| 2356 | } | ||
| 2357 | |||
| 2358 | Expression ThreadId(Operation operation) { | ||
| 2359 | if (!device.HasShaderBallot()) { | ||
| 2360 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 2361 | return {"0U", Type::Uint}; | ||
| 2362 | } | ||
| 2363 | return {"gl_SubGroupInvocationARB", Type::Uint}; | ||
| 2364 | } | ||
| 2365 | |||
| 2366 | template <const std::string_view& comparison> | ||
| 2367 | Expression ThreadMask(Operation) { | ||
| 2368 | if (device.HasWarpIntrinsics()) { | ||
| 2369 | return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; | ||
| 2370 | } | ||
| 2371 | if (device.HasShaderBallot()) { | ||
| 2372 | return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; | ||
| 2373 | } | ||
| 2374 | LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); | ||
| 2375 | return {"0U", Type::Uint}; | ||
| 2376 | } | ||
| 2377 | |||
| 2378 | Expression ShuffleIndexed(Operation operation) { | ||
| 2379 | std::string value = VisitOperand(operation, 0).AsFloat(); | ||
| 2380 | |||
| 2381 | if (!device.HasShaderBallot()) { | ||
| 2382 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 2383 | return {std::move(value), Type::Float}; | ||
| 2384 | } | ||
| 2385 | |||
| 2386 | const std::string index = VisitOperand(operation, 1).AsUint(); | ||
| 2387 | return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; | ||
| 2388 | } | ||
| 2389 | |||
| 2390 | Expression Barrier(Operation) { | ||
| 2391 | if (!ir.IsDecompiled()) { | ||
| 2392 | LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); | ||
| 2393 | return {}; | ||
| 2394 | } | ||
| 2395 | code.AddLine("barrier();"); | ||
| 2396 | return {}; | ||
| 2397 | } | ||
| 2398 | |||
| 2399 | Expression MemoryBarrierGroup(Operation) { | ||
| 2400 | code.AddLine("groupMemoryBarrier();"); | ||
| 2401 | return {}; | ||
| 2402 | } | ||
| 2403 | |||
| 2404 | Expression MemoryBarrierGlobal(Operation) { | ||
| 2405 | code.AddLine("memoryBarrier();"); | ||
| 2406 | return {}; | ||
| 2407 | } | ||
| 2408 | |||
| 2409 | struct Func final { | ||
| 2410 | Func() = delete; | ||
| 2411 | ~Func() = delete; | ||
| 2412 | |||
| 2413 | static constexpr std::string_view LessThan = "<"; | ||
| 2414 | static constexpr std::string_view Equal = "=="; | ||
| 2415 | static constexpr std::string_view LessEqual = "<="; | ||
| 2416 | static constexpr std::string_view GreaterThan = ">"; | ||
| 2417 | static constexpr std::string_view NotEqual = "!="; | ||
| 2418 | static constexpr std::string_view GreaterEqual = ">="; | ||
| 2419 | |||
| 2420 | static constexpr std::string_view Eq = "Eq"; | ||
| 2421 | static constexpr std::string_view Ge = "Ge"; | ||
| 2422 | static constexpr std::string_view Gt = "Gt"; | ||
| 2423 | static constexpr std::string_view Le = "Le"; | ||
| 2424 | static constexpr std::string_view Lt = "Lt"; | ||
| 2425 | |||
| 2426 | static constexpr std::string_view Add = "Add"; | ||
| 2427 | static constexpr std::string_view Min = "Min"; | ||
| 2428 | static constexpr std::string_view Max = "Max"; | ||
| 2429 | static constexpr std::string_view And = "And"; | ||
| 2430 | static constexpr std::string_view Or = "Or"; | ||
| 2431 | static constexpr std::string_view Xor = "Xor"; | ||
| 2432 | static constexpr std::string_view Exchange = "Exchange"; | ||
| 2433 | }; | ||
| 2434 | |||
| 2435 | static constexpr std::array operation_decompilers = { | ||
| 2436 | &GLSLDecompiler::Assign, | ||
| 2437 | |||
| 2438 | &GLSLDecompiler::Select, | ||
| 2439 | |||
| 2440 | &GLSLDecompiler::Add<Type::Float>, | ||
| 2441 | &GLSLDecompiler::Mul<Type::Float>, | ||
| 2442 | &GLSLDecompiler::Div<Type::Float>, | ||
| 2443 | &GLSLDecompiler::Fma<Type::Float>, | ||
| 2444 | &GLSLDecompiler::Negate<Type::Float>, | ||
| 2445 | &GLSLDecompiler::Absolute<Type::Float>, | ||
| 2446 | &GLSLDecompiler::FClamp, | ||
| 2447 | &GLSLDecompiler::FCastHalf0, | ||
| 2448 | &GLSLDecompiler::FCastHalf1, | ||
| 2449 | &GLSLDecompiler::Min<Type::Float>, | ||
| 2450 | &GLSLDecompiler::Max<Type::Float>, | ||
| 2451 | &GLSLDecompiler::FCos, | ||
| 2452 | &GLSLDecompiler::FSin, | ||
| 2453 | &GLSLDecompiler::FExp2, | ||
| 2454 | &GLSLDecompiler::FLog2, | ||
| 2455 | &GLSLDecompiler::FInverseSqrt, | ||
| 2456 | &GLSLDecompiler::FSqrt, | ||
| 2457 | &GLSLDecompiler::FRoundEven, | ||
| 2458 | &GLSLDecompiler::FFloor, | ||
| 2459 | &GLSLDecompiler::FCeil, | ||
| 2460 | &GLSLDecompiler::FTrunc, | ||
| 2461 | &GLSLDecompiler::FCastInteger<Type::Int>, | ||
| 2462 | &GLSLDecompiler::FCastInteger<Type::Uint>, | ||
| 2463 | &GLSLDecompiler::FSwizzleAdd, | ||
| 2464 | |||
| 2465 | &GLSLDecompiler::Add<Type::Int>, | ||
| 2466 | &GLSLDecompiler::Mul<Type::Int>, | ||
| 2467 | &GLSLDecompiler::Div<Type::Int>, | ||
| 2468 | &GLSLDecompiler::Negate<Type::Int>, | ||
| 2469 | &GLSLDecompiler::Absolute<Type::Int>, | ||
| 2470 | &GLSLDecompiler::Min<Type::Int>, | ||
| 2471 | &GLSLDecompiler::Max<Type::Int>, | ||
| 2472 | |||
| 2473 | &GLSLDecompiler::ICastFloat, | ||
| 2474 | &GLSLDecompiler::ICastUnsigned, | ||
| 2475 | &GLSLDecompiler::LogicalShiftLeft<Type::Int>, | ||
| 2476 | &GLSLDecompiler::ILogicalShiftRight, | ||
| 2477 | &GLSLDecompiler::IArithmeticShiftRight, | ||
| 2478 | &GLSLDecompiler::BitwiseAnd<Type::Int>, | ||
| 2479 | &GLSLDecompiler::BitwiseOr<Type::Int>, | ||
| 2480 | &GLSLDecompiler::BitwiseXor<Type::Int>, | ||
| 2481 | &GLSLDecompiler::BitwiseNot<Type::Int>, | ||
| 2482 | &GLSLDecompiler::BitfieldInsert<Type::Int>, | ||
| 2483 | &GLSLDecompiler::BitfieldExtract<Type::Int>, | ||
| 2484 | &GLSLDecompiler::BitCount<Type::Int>, | ||
| 2485 | &GLSLDecompiler::BitMSB<Type::Int>, | ||
| 2486 | |||
| 2487 | &GLSLDecompiler::Add<Type::Uint>, | ||
| 2488 | &GLSLDecompiler::Mul<Type::Uint>, | ||
| 2489 | &GLSLDecompiler::Div<Type::Uint>, | ||
| 2490 | &GLSLDecompiler::Min<Type::Uint>, | ||
| 2491 | &GLSLDecompiler::Max<Type::Uint>, | ||
| 2492 | &GLSLDecompiler::UCastFloat, | ||
| 2493 | &GLSLDecompiler::UCastSigned, | ||
| 2494 | &GLSLDecompiler::LogicalShiftLeft<Type::Uint>, | ||
| 2495 | &GLSLDecompiler::UShiftRight, | ||
| 2496 | &GLSLDecompiler::UShiftRight, | ||
| 2497 | &GLSLDecompiler::BitwiseAnd<Type::Uint>, | ||
| 2498 | &GLSLDecompiler::BitwiseOr<Type::Uint>, | ||
| 2499 | &GLSLDecompiler::BitwiseXor<Type::Uint>, | ||
| 2500 | &GLSLDecompiler::BitwiseNot<Type::Uint>, | ||
| 2501 | &GLSLDecompiler::BitfieldInsert<Type::Uint>, | ||
| 2502 | &GLSLDecompiler::BitfieldExtract<Type::Uint>, | ||
| 2503 | &GLSLDecompiler::BitCount<Type::Uint>, | ||
| 2504 | &GLSLDecompiler::BitMSB<Type::Uint>, | ||
| 2505 | |||
| 2506 | &GLSLDecompiler::Add<Type::HalfFloat>, | ||
| 2507 | &GLSLDecompiler::Mul<Type::HalfFloat>, | ||
| 2508 | &GLSLDecompiler::Fma<Type::HalfFloat>, | ||
| 2509 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | ||
| 2510 | &GLSLDecompiler::HNegate, | ||
| 2511 | &GLSLDecompiler::HClamp, | ||
| 2512 | &GLSLDecompiler::HCastFloat, | ||
| 2513 | &GLSLDecompiler::HUnpack, | ||
| 2514 | &GLSLDecompiler::HMergeF32, | ||
| 2515 | &GLSLDecompiler::HMergeH0, | ||
| 2516 | &GLSLDecompiler::HMergeH1, | ||
| 2517 | &GLSLDecompiler::HPack2, | ||
| 2518 | |||
| 2519 | &GLSLDecompiler::LogicalAssign, | ||
| 2520 | &GLSLDecompiler::LogicalAnd, | ||
| 2521 | &GLSLDecompiler::LogicalOr, | ||
| 2522 | &GLSLDecompiler::LogicalXor, | ||
| 2523 | &GLSLDecompiler::LogicalNegate, | ||
| 2524 | &GLSLDecompiler::LogicalPick2, | ||
| 2525 | &GLSLDecompiler::LogicalAnd2, | ||
| 2526 | |||
| 2527 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>, | ||
| 2528 | &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>, | ||
| 2529 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>, | ||
| 2530 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>, | ||
| 2531 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>, | ||
| 2532 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>, | ||
| 2533 | &GLSLDecompiler::FOrdered, | ||
| 2534 | &GLSLDecompiler::FUnordered, | ||
| 2535 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>, | ||
| 2536 | &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>, | ||
| 2537 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>, | ||
| 2538 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>, | ||
| 2539 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>, | ||
| 2540 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>, | ||
| 2541 | |||
| 2542 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>, | ||
| 2543 | &GLSLDecompiler::Comparison<Func::Equal, Type::Int>, | ||
| 2544 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>, | ||
| 2545 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>, | ||
| 2546 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>, | ||
| 2547 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>, | ||
| 2548 | |||
| 2549 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>, | ||
| 2550 | &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>, | ||
| 2551 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>, | ||
| 2552 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>, | ||
| 2553 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>, | ||
| 2554 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>, | ||
| 2555 | |||
| 2556 | &GLSLDecompiler::LogicalAddCarry, | ||
| 2557 | |||
| 2558 | &GLSLDecompiler::Logical2HLessThan<false>, | ||
| 2559 | &GLSLDecompiler::Logical2HEqual<false>, | ||
| 2560 | &GLSLDecompiler::Logical2HLessEqual<false>, | ||
| 2561 | &GLSLDecompiler::Logical2HGreaterThan<false>, | ||
| 2562 | &GLSLDecompiler::Logical2HNotEqual<false>, | ||
| 2563 | &GLSLDecompiler::Logical2HGreaterEqual<false>, | ||
| 2564 | &GLSLDecompiler::Logical2HLessThan<true>, | ||
| 2565 | &GLSLDecompiler::Logical2HEqual<true>, | ||
| 2566 | &GLSLDecompiler::Logical2HLessEqual<true>, | ||
| 2567 | &GLSLDecompiler::Logical2HGreaterThan<true>, | ||
| 2568 | &GLSLDecompiler::Logical2HNotEqual<true>, | ||
| 2569 | &GLSLDecompiler::Logical2HGreaterEqual<true>, | ||
| 2570 | |||
| 2571 | &GLSLDecompiler::Texture, | ||
| 2572 | &GLSLDecompiler::TextureLod, | ||
| 2573 | &GLSLDecompiler::TextureGather, | ||
| 2574 | &GLSLDecompiler::TextureQueryDimensions, | ||
| 2575 | &GLSLDecompiler::TextureQueryLod, | ||
| 2576 | &GLSLDecompiler::TexelFetch, | ||
| 2577 | &GLSLDecompiler::TextureGradient, | ||
| 2578 | |||
| 2579 | &GLSLDecompiler::ImageLoad, | ||
| 2580 | &GLSLDecompiler::ImageStore, | ||
| 2581 | |||
| 2582 | &GLSLDecompiler::AtomicImage<Func::Add>, | ||
| 2583 | &GLSLDecompiler::AtomicImage<Func::And>, | ||
| 2584 | &GLSLDecompiler::AtomicImage<Func::Or>, | ||
| 2585 | &GLSLDecompiler::AtomicImage<Func::Xor>, | ||
| 2586 | &GLSLDecompiler::AtomicImage<Func::Exchange>, | ||
| 2587 | |||
| 2588 | &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>, | ||
| 2589 | &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, | ||
| 2590 | &GLSLDecompiler::Atomic<Func::Min, Type::Uint>, | ||
| 2591 | &GLSLDecompiler::Atomic<Func::Max, Type::Uint>, | ||
| 2592 | &GLSLDecompiler::Atomic<Func::And, Type::Uint>, | ||
| 2593 | &GLSLDecompiler::Atomic<Func::Or, Type::Uint>, | ||
| 2594 | &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>, | ||
| 2595 | |||
| 2596 | &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>, | ||
| 2597 | &GLSLDecompiler::Atomic<Func::Add, Type::Int>, | ||
| 2598 | &GLSLDecompiler::Atomic<Func::Min, Type::Int>, | ||
| 2599 | &GLSLDecompiler::Atomic<Func::Max, Type::Int>, | ||
| 2600 | &GLSLDecompiler::Atomic<Func::And, Type::Int>, | ||
| 2601 | &GLSLDecompiler::Atomic<Func::Or, Type::Int>, | ||
| 2602 | &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, | ||
| 2603 | |||
| 2604 | &GLSLDecompiler::Reduce<Func::Add, Type::Uint>, | ||
| 2605 | &GLSLDecompiler::Reduce<Func::Min, Type::Uint>, | ||
| 2606 | &GLSLDecompiler::Reduce<Func::Max, Type::Uint>, | ||
| 2607 | &GLSLDecompiler::Reduce<Func::And, Type::Uint>, | ||
| 2608 | &GLSLDecompiler::Reduce<Func::Or, Type::Uint>, | ||
| 2609 | &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>, | ||
| 2610 | |||
| 2611 | &GLSLDecompiler::Reduce<Func::Add, Type::Int>, | ||
| 2612 | &GLSLDecompiler::Reduce<Func::Min, Type::Int>, | ||
| 2613 | &GLSLDecompiler::Reduce<Func::Max, Type::Int>, | ||
| 2614 | &GLSLDecompiler::Reduce<Func::And, Type::Int>, | ||
| 2615 | &GLSLDecompiler::Reduce<Func::Or, Type::Int>, | ||
| 2616 | &GLSLDecompiler::Reduce<Func::Xor, Type::Int>, | ||
| 2617 | |||
| 2618 | &GLSLDecompiler::Branch, | ||
| 2619 | &GLSLDecompiler::BranchIndirect, | ||
| 2620 | &GLSLDecompiler::PushFlowStack, | ||
| 2621 | &GLSLDecompiler::PopFlowStack, | ||
| 2622 | &GLSLDecompiler::Exit, | ||
| 2623 | &GLSLDecompiler::Discard, | ||
| 2624 | |||
| 2625 | &GLSLDecompiler::EmitVertex, | ||
| 2626 | &GLSLDecompiler::EndPrimitive, | ||
| 2627 | |||
| 2628 | &GLSLDecompiler::InvocationId, | ||
| 2629 | &GLSLDecompiler::YNegate, | ||
| 2630 | &GLSLDecompiler::LocalInvocationId<0>, | ||
| 2631 | &GLSLDecompiler::LocalInvocationId<1>, | ||
| 2632 | &GLSLDecompiler::LocalInvocationId<2>, | ||
| 2633 | &GLSLDecompiler::WorkGroupId<0>, | ||
| 2634 | &GLSLDecompiler::WorkGroupId<1>, | ||
| 2635 | &GLSLDecompiler::WorkGroupId<2>, | ||
| 2636 | |||
| 2637 | &GLSLDecompiler::BallotThread, | ||
| 2638 | &GLSLDecompiler::VoteAll, | ||
| 2639 | &GLSLDecompiler::VoteAny, | ||
| 2640 | &GLSLDecompiler::VoteEqual, | ||
| 2641 | |||
| 2642 | &GLSLDecompiler::ThreadId, | ||
| 2643 | &GLSLDecompiler::ThreadMask<Func::Eq>, | ||
| 2644 | &GLSLDecompiler::ThreadMask<Func::Ge>, | ||
| 2645 | &GLSLDecompiler::ThreadMask<Func::Gt>, | ||
| 2646 | &GLSLDecompiler::ThreadMask<Func::Le>, | ||
| 2647 | &GLSLDecompiler::ThreadMask<Func::Lt>, | ||
| 2648 | &GLSLDecompiler::ShuffleIndexed, | ||
| 2649 | |||
| 2650 | &GLSLDecompiler::Barrier, | ||
| 2651 | &GLSLDecompiler::MemoryBarrierGroup, | ||
| 2652 | &GLSLDecompiler::MemoryBarrierGlobal, | ||
| 2653 | }; | ||
| 2654 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 2655 | |||
| 2656 | std::string GetRegister(u32 index) const { | ||
| 2657 | return AppendSuffix(index, "gpr"); | ||
| 2658 | } | ||
| 2659 | |||
| 2660 | std::string GetCustomVariable(u32 index) const { | ||
| 2661 | return AppendSuffix(index, "custom_var"); | ||
| 2662 | } | ||
| 2663 | |||
| 2664 | std::string GetPredicate(Tegra::Shader::Pred pred) const { | ||
| 2665 | return AppendSuffix(static_cast<u32>(pred), "pred"); | ||
| 2666 | } | ||
| 2667 | |||
| 2668 | std::string GetGenericInputAttribute(Attribute::Index attribute) const { | ||
| 2669 | return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); | ||
| 2670 | } | ||
| 2671 | |||
| 2672 | std::unordered_map<u8, GenericVaryingDescription> varying_description; | ||
| 2673 | |||
| 2674 | std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { | ||
| 2675 | const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); | ||
| 2676 | const auto& description = varying_description.at(offset); | ||
| 2677 | if (description.is_scalar) { | ||
| 2678 | return description.name; | ||
| 2679 | } | ||
| 2680 | return fmt::format("{}[{}]", description.name, element - description.first_element); | ||
| 2681 | } | ||
| 2682 | |||
| 2683 | std::string GetConstBuffer(u32 index) const { | ||
| 2684 | return AppendSuffix(index, "cbuf"); | ||
| 2685 | } | ||
| 2686 | |||
| 2687 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { | ||
| 2688 | return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); | ||
| 2689 | } | ||
| 2690 | |||
| 2691 | std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { | ||
| 2692 | return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, | ||
| 2693 | suffix); | ||
| 2694 | } | ||
| 2695 | |||
| 2696 | std::string GetConstBufferBlock(u32 index) const { | ||
| 2697 | return AppendSuffix(index, "cbuf_block"); | ||
| 2698 | } | ||
| 2699 | |||
| 2700 | std::string GetLocalMemory() const { | ||
| 2701 | if (suffix.empty()) { | ||
| 2702 | return "lmem"; | ||
| 2703 | } else { | ||
| 2704 | return "lmem_" + std::string{suffix}; | ||
| 2705 | } | ||
| 2706 | } | ||
| 2707 | |||
| 2708 | std::string GetInternalFlag(InternalFlag flag) const { | ||
| 2709 | constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", | ||
| 2710 | "overflow_flag"}; | ||
| 2711 | const auto index = static_cast<u32>(flag); | ||
| 2712 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | ||
| 2713 | |||
| 2714 | if (suffix.empty()) { | ||
| 2715 | return InternalFlagNames[index]; | ||
| 2716 | } else { | ||
| 2717 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | ||
| 2718 | } | ||
| 2719 | } | ||
| 2720 | |||
| 2721 | std::string GetSampler(const SamplerEntry& sampler) const { | ||
| 2722 | return AppendSuffix(sampler.index, "sampler"); | ||
| 2723 | } | ||
| 2724 | |||
| 2725 | std::string GetImage(const ImageEntry& image) const { | ||
| 2726 | return AppendSuffix(image.index, "image"); | ||
| 2727 | } | ||
| 2728 | |||
| 2729 | std::string AppendSuffix(u32 index, std::string_view name) const { | ||
| 2730 | if (suffix.empty()) { | ||
| 2731 | return fmt::format("{}{}", name, index); | ||
| 2732 | } else { | ||
| 2733 | return fmt::format("{}{}_{}", name, index, suffix); | ||
| 2734 | } | ||
| 2735 | } | ||
| 2736 | |||
| 2737 | u32 GetNumPhysicalInputAttributes() const { | ||
| 2738 | return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | ||
| 2739 | } | ||
| 2740 | |||
| 2741 | u32 GetNumPhysicalAttributes() const { | ||
| 2742 | return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); | ||
| 2743 | } | ||
| 2744 | |||
| 2745 | u32 GetNumPhysicalVaryings() const { | ||
| 2746 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); | ||
| 2747 | } | ||
| 2748 | |||
| 2749 | const Device& device; | ||
| 2750 | const ShaderIR& ir; | ||
| 2751 | const Registry& registry; | ||
| 2752 | const ShaderType stage; | ||
| 2753 | const std::string_view identifier; | ||
| 2754 | const std::string_view suffix; | ||
| 2755 | const Header header; | ||
| 2756 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2757 | |||
| 2758 | ShaderWriter code; | ||
| 2759 | |||
| 2760 | std::optional<u32> max_input_vertices; | ||
| 2761 | }; | ||
| 2762 | |||
| 2763 | std::string GetFlowVariable(u32 index) { | ||
| 2764 | return fmt::format("flow_var{}", index); | ||
| 2765 | } | ||
| 2766 | |||
| 2767 | class ExprDecompiler { | ||
| 2768 | public: | ||
| 2769 | explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} | ||
| 2770 | |||
| 2771 | void operator()(const ExprAnd& expr) { | ||
| 2772 | inner += '('; | ||
| 2773 | std::visit(*this, *expr.operand1); | ||
| 2774 | inner += " && "; | ||
| 2775 | std::visit(*this, *expr.operand2); | ||
| 2776 | inner += ')'; | ||
| 2777 | } | ||
| 2778 | |||
| 2779 | void operator()(const ExprOr& expr) { | ||
| 2780 | inner += '('; | ||
| 2781 | std::visit(*this, *expr.operand1); | ||
| 2782 | inner += " || "; | ||
| 2783 | std::visit(*this, *expr.operand2); | ||
| 2784 | inner += ')'; | ||
| 2785 | } | ||
| 2786 | |||
| 2787 | void operator()(const ExprNot& expr) { | ||
| 2788 | inner += '!'; | ||
| 2789 | std::visit(*this, *expr.operand1); | ||
| 2790 | } | ||
| 2791 | |||
| 2792 | void operator()(const ExprPredicate& expr) { | ||
| 2793 | const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); | ||
| 2794 | inner += decomp.GetPredicate(pred); | ||
| 2795 | } | ||
| 2796 | |||
| 2797 | void operator()(const ExprCondCode& expr) { | ||
| 2798 | inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); | ||
| 2799 | } | ||
| 2800 | |||
| 2801 | void operator()(const ExprVar& expr) { | ||
| 2802 | inner += GetFlowVariable(expr.var_index); | ||
| 2803 | } | ||
| 2804 | |||
| 2805 | void operator()(const ExprBoolean& expr) { | ||
| 2806 | inner += expr.value ? "true" : "false"; | ||
| 2807 | } | ||
| 2808 | |||
| 2809 | void operator()(VideoCommon::Shader::ExprGprEqual& expr) { | ||
| 2810 | inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); | ||
| 2811 | } | ||
| 2812 | |||
| 2813 | const std::string& GetResult() const { | ||
| 2814 | return inner; | ||
| 2815 | } | ||
| 2816 | |||
| 2817 | private: | ||
| 2818 | GLSLDecompiler& decomp; | ||
| 2819 | std::string inner; | ||
| 2820 | }; | ||
| 2821 | |||
| 2822 | class ASTDecompiler { | ||
| 2823 | public: | ||
| 2824 | explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} | ||
| 2825 | |||
| 2826 | void operator()(const ASTProgram& ast) { | ||
| 2827 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2828 | while (current) { | ||
| 2829 | Visit(current); | ||
| 2830 | current = current->GetNext(); | ||
| 2831 | } | ||
| 2832 | } | ||
| 2833 | |||
| 2834 | void operator()(const ASTIfThen& ast) { | ||
| 2835 | ExprDecompiler expr_parser{decomp}; | ||
| 2836 | std::visit(expr_parser, *ast.condition); | ||
| 2837 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); | ||
| 2838 | decomp.code.scope++; | ||
| 2839 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2840 | while (current) { | ||
| 2841 | Visit(current); | ||
| 2842 | current = current->GetNext(); | ||
| 2843 | } | ||
| 2844 | decomp.code.scope--; | ||
| 2845 | decomp.code.AddLine("}}"); | ||
| 2846 | } | ||
| 2847 | |||
| 2848 | void operator()(const ASTIfElse& ast) { | ||
| 2849 | decomp.code.AddLine("else {{"); | ||
| 2850 | decomp.code.scope++; | ||
| 2851 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2852 | while (current) { | ||
| 2853 | Visit(current); | ||
| 2854 | current = current->GetNext(); | ||
| 2855 | } | ||
| 2856 | decomp.code.scope--; | ||
| 2857 | decomp.code.AddLine("}}"); | ||
| 2858 | } | ||
| 2859 | |||
| 2860 | void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { | ||
| 2861 | UNREACHABLE(); | ||
| 2862 | } | ||
| 2863 | |||
| 2864 | void operator()(const ASTBlockDecoded& ast) { | ||
| 2865 | decomp.VisitBlock(ast.nodes); | ||
| 2866 | } | ||
| 2867 | |||
| 2868 | void operator()(const ASTVarSet& ast) { | ||
| 2869 | ExprDecompiler expr_parser{decomp}; | ||
| 2870 | std::visit(expr_parser, *ast.condition); | ||
| 2871 | decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); | ||
| 2872 | } | ||
| 2873 | |||
| 2874 | void operator()(const ASTLabel& ast) { | ||
| 2875 | decomp.code.AddLine("// Label_{}:", ast.index); | ||
| 2876 | } | ||
| 2877 | |||
| 2878 | void operator()([[maybe_unused]] const ASTGoto& ast) { | ||
| 2879 | UNREACHABLE(); | ||
| 2880 | } | ||
| 2881 | |||
| 2882 | void operator()(const ASTDoWhile& ast) { | ||
| 2883 | ExprDecompiler expr_parser{decomp}; | ||
| 2884 | std::visit(expr_parser, *ast.condition); | ||
| 2885 | decomp.code.AddLine("do {{"); | ||
| 2886 | decomp.code.scope++; | ||
| 2887 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2888 | while (current) { | ||
| 2889 | Visit(current); | ||
| 2890 | current = current->GetNext(); | ||
| 2891 | } | ||
| 2892 | decomp.code.scope--; | ||
| 2893 | decomp.code.AddLine("}} while({});", expr_parser.GetResult()); | ||
| 2894 | } | ||
| 2895 | |||
| 2896 | void operator()(const ASTReturn& ast) { | ||
| 2897 | const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); | ||
| 2898 | if (!is_true) { | ||
| 2899 | ExprDecompiler expr_parser{decomp}; | ||
| 2900 | std::visit(expr_parser, *ast.condition); | ||
| 2901 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); | ||
| 2902 | decomp.code.scope++; | ||
| 2903 | } | ||
| 2904 | if (ast.kills) { | ||
| 2905 | decomp.code.AddLine("discard;"); | ||
| 2906 | } else { | ||
| 2907 | decomp.PreExit(); | ||
| 2908 | decomp.code.AddLine("return;"); | ||
| 2909 | } | ||
| 2910 | if (!is_true) { | ||
| 2911 | decomp.code.scope--; | ||
| 2912 | decomp.code.AddLine("}}"); | ||
| 2913 | } | ||
| 2914 | } | ||
| 2915 | |||
| 2916 | void operator()(const ASTBreak& ast) { | ||
| 2917 | const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); | ||
| 2918 | if (!is_true) { | ||
| 2919 | ExprDecompiler expr_parser{decomp}; | ||
| 2920 | std::visit(expr_parser, *ast.condition); | ||
| 2921 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); | ||
| 2922 | decomp.code.scope++; | ||
| 2923 | } | ||
| 2924 | decomp.code.AddLine("break;"); | ||
| 2925 | if (!is_true) { | ||
| 2926 | decomp.code.scope--; | ||
| 2927 | decomp.code.AddLine("}}"); | ||
| 2928 | } | ||
| 2929 | } | ||
| 2930 | |||
| 2931 | void Visit(const ASTNode& node) { | ||
| 2932 | std::visit(*this, *node->GetInnerData()); | ||
| 2933 | } | ||
| 2934 | |||
| 2935 | private: | ||
| 2936 | GLSLDecompiler& decomp; | ||
| 2937 | }; | ||
| 2938 | |||
| 2939 | void GLSLDecompiler::DecompileAST() { | ||
| 2940 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 2941 | for (u32 i = 0; i < num_flow_variables; i++) { | ||
| 2942 | code.AddLine("bool {} = false;", GetFlowVariable(i)); | ||
| 2943 | } | ||
| 2944 | |||
| 2945 | ASTDecompiler decompiler{*this}; | ||
| 2946 | decompiler.Visit(ir.GetASTProgram()); | ||
| 2947 | } | ||
| 2948 | |||
| 2949 | } // Anonymous namespace | ||
| 2950 | |||
| 2951 | ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { | ||
| 2952 | ShaderEntries entries; | ||
| 2953 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 2954 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | ||
| 2955 | cbuf.first); | ||
| 2956 | } | ||
| 2957 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 2958 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, | ||
| 2959 | usage.is_written); | ||
| 2960 | } | ||
| 2961 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 2962 | entries.samplers.emplace_back(sampler); | ||
| 2963 | } | ||
| 2964 | for (const auto& image : ir.GetImages()) { | ||
| 2965 | entries.images.emplace_back(image); | ||
| 2966 | } | ||
| 2967 | const auto clip_distances = ir.GetClipDistances(); | ||
| 2968 | for (std::size_t i = 0; i < std::size(clip_distances); ++i) { | ||
| 2969 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; | ||
| 2970 | } | ||
| 2971 | for (const auto& buffer : entries.const_buffers) { | ||
| 2972 | entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||
| 2973 | } | ||
| 2974 | entries.shader_length = ir.GetLength(); | ||
| 2975 | return entries; | ||
| 2976 | } | ||
| 2977 | |||
| 2978 | std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 2979 | ShaderType stage, std::string_view identifier, | ||
| 2980 | std::string_view suffix) { | ||
| 2981 | GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); | ||
| 2982 | decompiler.Decompile(); | ||
| 2983 | return decompiler.GetResult(); | ||
| 2984 | } | ||
| 2985 | |||
| 2986 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 0397a000c..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null | |||
| @@ -1,69 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/maxwell_3d.h" | ||
| 14 | #include "video_core/engines/shader_type.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | class Device; | ||
| 21 | |||
| 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 23 | using SamplerEntry = VideoCommon::Shader::SamplerEntry; | ||
| 24 | using ImageEntry = VideoCommon::Shader::ImageEntry; | ||
| 25 | |||
| 26 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | ||
| 27 | public: | ||
| 28 | explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) | ||
| 29 | : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} | ||
| 30 | |||
| 31 | u32 GetIndex() const { | ||
| 32 | return index; | ||
| 33 | } | ||
| 34 | |||
| 35 | private: | ||
| 36 | u32 index = 0; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct GlobalMemoryEntry { | ||
| 40 | constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, | ||
| 41 | bool is_written_) | ||
| 42 | : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ | ||
| 43 | is_written_} {} | ||
| 44 | |||
| 45 | u32 cbuf_index = 0; | ||
| 46 | u32 cbuf_offset = 0; | ||
| 47 | bool is_read = false; | ||
| 48 | bool is_written = false; | ||
| 49 | }; | ||
| 50 | |||
| 51 | struct ShaderEntries { | ||
| 52 | std::vector<ConstBufferEntry> const_buffers; | ||
| 53 | std::vector<GlobalMemoryEntry> global_memory_entries; | ||
| 54 | std::vector<SamplerEntry> samplers; | ||
| 55 | std::vector<ImageEntry> images; | ||
| 56 | std::size_t shader_length{}; | ||
| 57 | u32 clip_distances{}; | ||
| 58 | u32 enabled_uniform_buffers{}; | ||
| 59 | }; | ||
| 60 | |||
| 61 | ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 62 | Tegra::Engines::ShaderType stage); | ||
| 63 | |||
| 64 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 65 | const VideoCommon::Shader::Registry& registry, | ||
| 66 | Tegra::Engines::ShaderType stage, std::string_view identifier, | ||
| 67 | std::string_view suffix = {}); | ||
| 68 | |||
| 69 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp deleted file mode 100644 index 0deb86517..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ /dev/null | |||
| @@ -1,482 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/fs/file.h" | ||
| 12 | #include "common/fs/fs.h" | ||
| 13 | #include "common/fs/path_util.h" | ||
| 14 | #include "common/logging/log.h" | ||
| 15 | #include "common/scm_rev.h" | ||
| 16 | #include "common/settings.h" | ||
| 17 | #include "common/zstd_compression.h" | ||
| 18 | #include "core/core.h" | ||
| 19 | #include "core/hle/kernel/k_process.h" | ||
| 20 | #include "video_core/engines/shader_type.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | ||
| 23 | |||
| 24 | namespace OpenGL { | ||
| 25 | |||
| 26 | using Tegra::Engines::ShaderType; | ||
| 27 | using VideoCommon::Shader::BindlessSamplerMap; | ||
| 28 | using VideoCommon::Shader::BoundSamplerMap; | ||
| 29 | using VideoCommon::Shader::KeyMap; | ||
| 30 | using VideoCommon::Shader::SeparateSamplerKey; | ||
| 31 | using ShaderCacheVersionHash = std::array<u8, 64>; | ||
| 32 | |||
| 33 | struct ConstBufferKey { | ||
| 34 | u32 cbuf = 0; | ||
| 35 | u32 offset = 0; | ||
| 36 | u32 value = 0; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct BoundSamplerEntry { | ||
| 40 | u32 offset = 0; | ||
| 41 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 42 | }; | ||
| 43 | |||
| 44 | struct SeparateSamplerEntry { | ||
| 45 | u32 cbuf1 = 0; | ||
| 46 | u32 cbuf2 = 0; | ||
| 47 | u32 offset1 = 0; | ||
| 48 | u32 offset2 = 0; | ||
| 49 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 50 | }; | ||
| 51 | |||
| 52 | struct BindlessSamplerEntry { | ||
| 53 | u32 cbuf = 0; | ||
| 54 | u32 offset = 0; | ||
| 55 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 56 | }; | ||
| 57 | |||
| 58 | namespace { | ||
| 59 | |||
| 60 | constexpr u32 NativeVersion = 21; | ||
| 61 | |||
| 62 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | ||
| 63 | ShaderCacheVersionHash hash{}; | ||
| 64 | const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); | ||
| 65 | std::memcpy(hash.data(), Common::g_shader_cache_version, length); | ||
| 66 | return hash; | ||
| 67 | } | ||
| 68 | |||
| 69 | } // Anonymous namespace | ||
| 70 | |||
| 71 | ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; | ||
| 72 | |||
| 73 | ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; | ||
| 74 | |||
| 75 | bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { | ||
| 76 | if (!file.ReadObject(type)) { | ||
| 77 | return false; | ||
| 78 | } | ||
| 79 | u32 code_size; | ||
| 80 | u32 code_size_b; | ||
| 81 | if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { | ||
| 82 | return false; | ||
| 83 | } | ||
| 84 | code.resize(code_size); | ||
| 85 | code_b.resize(code_size_b); | ||
| 86 | if (file.Read(code) != code_size) { | ||
| 87 | return false; | ||
| 88 | } | ||
| 89 | if (HasProgramA() && file.Read(code_b) != code_size_b) { | ||
| 90 | return false; | ||
| 91 | } | ||
| 92 | |||
| 93 | u8 is_texture_handler_size_known; | ||
| 94 | u32 texture_handler_size_value; | ||
| 95 | u32 num_keys; | ||
| 96 | u32 num_bound_samplers; | ||
| 97 | u32 num_separate_samplers; | ||
| 98 | u32 num_bindless_samplers; | ||
| 99 | if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || | ||
| 100 | !file.ReadObject(is_texture_handler_size_known) || | ||
| 101 | !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || | ||
| 102 | !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || | ||
| 103 | !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || | ||
| 104 | !file.ReadObject(num_bindless_samplers)) { | ||
| 105 | return false; | ||
| 106 | } | ||
| 107 | if (is_texture_handler_size_known) { | ||
| 108 | texture_handler_size = texture_handler_size_value; | ||
| 109 | } | ||
| 110 | |||
| 111 | std::vector<ConstBufferKey> flat_keys(num_keys); | ||
| 112 | std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers); | ||
| 113 | std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers); | ||
| 114 | std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers); | ||
| 115 | if (file.Read(flat_keys) != flat_keys.size() || | ||
| 116 | file.Read(flat_bound_samplers) != flat_bound_samplers.size() || | ||
| 117 | file.Read(flat_separate_samplers) != flat_separate_samplers.size() || | ||
| 118 | file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { | ||
| 119 | return false; | ||
| 120 | } | ||
| 121 | for (const auto& entry : flat_keys) { | ||
| 122 | keys.insert({{entry.cbuf, entry.offset}, entry.value}); | ||
| 123 | } | ||
| 124 | for (const auto& entry : flat_bound_samplers) { | ||
| 125 | bound_samplers.emplace(entry.offset, entry.sampler); | ||
| 126 | } | ||
| 127 | for (const auto& entry : flat_separate_samplers) { | ||
| 128 | SeparateSamplerKey key; | ||
| 129 | key.buffers = {entry.cbuf1, entry.cbuf2}; | ||
| 130 | key.offsets = {entry.offset1, entry.offset2}; | ||
| 131 | separate_samplers.emplace(key, entry.sampler); | ||
| 132 | } | ||
| 133 | for (const auto& entry : flat_bindless_samplers) { | ||
| 134 | bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); | ||
| 135 | } | ||
| 136 | |||
| 137 | return true; | ||
| 138 | } | ||
| 139 | |||
| 140 | bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { | ||
| 141 | if (!file.WriteObject(static_cast<u32>(type)) || | ||
| 142 | !file.WriteObject(static_cast<u32>(code.size())) || | ||
| 143 | !file.WriteObject(static_cast<u32>(code_b.size()))) { | ||
| 144 | return false; | ||
| 145 | } | ||
| 146 | if (file.Write(code) != code.size()) { | ||
| 147 | return false; | ||
| 148 | } | ||
| 149 | if (HasProgramA() && file.Write(code_b) != code_b.size()) { | ||
| 150 | return false; | ||
| 151 | } | ||
| 152 | |||
| 153 | if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || | ||
| 154 | !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) || | ||
| 155 | !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || | ||
| 156 | !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) || | ||
| 157 | !file.WriteObject(static_cast<u32>(bound_samplers.size())) || | ||
| 158 | !file.WriteObject(static_cast<u32>(separate_samplers.size())) || | ||
| 159 | !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) { | ||
| 160 | return false; | ||
| 161 | } | ||
| 162 | |||
| 163 | std::vector<ConstBufferKey> flat_keys; | ||
| 164 | flat_keys.reserve(keys.size()); | ||
| 165 | for (const auto& [address, value] : keys) { | ||
| 166 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); | ||
| 167 | } | ||
| 168 | |||
| 169 | std::vector<BoundSamplerEntry> flat_bound_samplers; | ||
| 170 | flat_bound_samplers.reserve(bound_samplers.size()); | ||
| 171 | for (const auto& [address, sampler] : bound_samplers) { | ||
| 172 | flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); | ||
| 173 | } | ||
| 174 | |||
| 175 | std::vector<SeparateSamplerEntry> flat_separate_samplers; | ||
| 176 | flat_separate_samplers.reserve(separate_samplers.size()); | ||
| 177 | for (const auto& [key, sampler] : separate_samplers) { | ||
| 178 | SeparateSamplerEntry entry; | ||
| 179 | std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; | ||
| 180 | std::tie(entry.offset1, entry.offset2) = key.offsets; | ||
| 181 | entry.sampler = sampler; | ||
| 182 | flat_separate_samplers.push_back(entry); | ||
| 183 | } | ||
| 184 | |||
| 185 | std::vector<BindlessSamplerEntry> flat_bindless_samplers; | ||
| 186 | flat_bindless_samplers.reserve(bindless_samplers.size()); | ||
| 187 | for (const auto& [address, sampler] : bindless_samplers) { | ||
| 188 | flat_bindless_samplers.push_back( | ||
| 189 | BindlessSamplerEntry{address.first, address.second, sampler}); | ||
| 190 | } | ||
| 191 | |||
| 192 | return file.Write(flat_keys) == flat_keys.size() && | ||
| 193 | file.Write(flat_bound_samplers) == flat_bound_samplers.size() && | ||
| 194 | file.Write(flat_separate_samplers) == flat_separate_samplers.size() && | ||
| 195 | file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); | ||
| 196 | } | ||
| 197 | |||
| 198 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; | ||
| 199 | |||
| 200 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; | ||
| 201 | |||
| 202 | void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { | ||
| 203 | title_id = title_id_; | ||
| 204 | } | ||
| 205 | |||
| 206 | std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { | ||
| 207 | // Skip games without title id | ||
| 208 | const bool has_title_id = title_id != 0; | ||
| 209 | if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { | ||
| 210 | return std::nullopt; | ||
| 211 | } | ||
| 212 | |||
| 213 | Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, | ||
| 214 | Common::FS::FileType::BinaryFile}; | ||
| 215 | if (!file.IsOpen()) { | ||
| 216 | LOG_INFO(Render_OpenGL, "No transferable shader cache found"); | ||
| 217 | is_usable = true; | ||
| 218 | return std::nullopt; | ||
| 219 | } | ||
| 220 | |||
| 221 | u32 version{}; | ||
| 222 | if (!file.ReadObject(version)) { | ||
| 223 | LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); | ||
| 224 | return std::nullopt; | ||
| 225 | } | ||
| 226 | |||
| 227 | if (version < NativeVersion) { | ||
| 228 | LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); | ||
| 229 | file.Close(); | ||
| 230 | InvalidateTransferable(); | ||
| 231 | is_usable = true; | ||
| 232 | return std::nullopt; | ||
| 233 | } | ||
| 234 | if (version > NativeVersion) { | ||
| 235 | LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " | ||
| 236 | "of the emulator, skipping"); | ||
| 237 | return std::nullopt; | ||
| 238 | } | ||
| 239 | |||
| 240 | // Version is valid, load the shaders | ||
| 241 | std::vector<ShaderDiskCacheEntry> entries; | ||
| 242 | while (static_cast<u64>(file.Tell()) < file.GetSize()) { | ||
| 243 | ShaderDiskCacheEntry& entry = entries.emplace_back(); | ||
| 244 | if (!entry.Load(file)) { | ||
| 245 | LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); | ||
| 246 | return std::nullopt; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | is_usable = true; | ||
| 251 | return {std::move(entries)}; | ||
| 252 | } | ||
| 253 | |||
| 254 | std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { | ||
| 255 | if (!is_usable) { | ||
| 256 | return {}; | ||
| 257 | } | ||
| 258 | |||
| 259 | Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, | ||
| 260 | Common::FS::FileType::BinaryFile}; | ||
| 261 | if (!file.IsOpen()) { | ||
| 262 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); | ||
| 263 | return {}; | ||
| 264 | } | ||
| 265 | |||
| 266 | if (const auto result = LoadPrecompiledFile(file)) { | ||
| 267 | return *result; | ||
| 268 | } | ||
| 269 | |||
| 270 | LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); | ||
| 271 | file.Close(); | ||
| 272 | InvalidatePrecompiled(); | ||
| 273 | return {}; | ||
| 274 | } | ||
| 275 | |||
| 276 | std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( | ||
| 277 | Common::FS::IOFile& file) { | ||
| 278 | // Read compressed file from disk and decompress to virtual precompiled cache file | ||
| 279 | std::vector<u8> compressed(file.GetSize()); | ||
| 280 | if (file.Read(compressed) != file.GetSize()) { | ||
| 281 | return std::nullopt; | ||
| 282 | } | ||
| 283 | const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed); | ||
| 284 | SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); | ||
| 285 | precompiled_cache_virtual_file_offset = 0; | ||
| 286 | |||
| 287 | ShaderCacheVersionHash file_hash{}; | ||
| 288 | if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { | ||
| 289 | precompiled_cache_virtual_file_offset = 0; | ||
| 290 | return std::nullopt; | ||
| 291 | } | ||
| 292 | if (GetShaderCacheVersionHash() != file_hash) { | ||
| 293 | LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); | ||
| 294 | precompiled_cache_virtual_file_offset = 0; | ||
| 295 | return std::nullopt; | ||
| 296 | } | ||
| 297 | |||
| 298 | std::vector<ShaderDiskCachePrecompiled> entries; | ||
| 299 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | ||
| 300 | u32 binary_size; | ||
| 301 | auto& entry = entries.emplace_back(); | ||
| 302 | if (!LoadObjectFromPrecompiled(entry.unique_identifier) || | ||
| 303 | !LoadObjectFromPrecompiled(entry.binary_format) || | ||
| 304 | !LoadObjectFromPrecompiled(binary_size)) { | ||
| 305 | return std::nullopt; | ||
| 306 | } | ||
| 307 | |||
| 308 | entry.binary.resize(binary_size); | ||
| 309 | if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { | ||
| 310 | return std::nullopt; | ||
| 311 | } | ||
| 312 | } | ||
| 313 | return entries; | ||
| 314 | } | ||
| 315 | |||
| 316 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { | ||
| 317 | if (!Common::FS::RemoveFile(GetTransferablePath())) { | ||
| 318 | LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", | ||
| 319 | Common::FS::PathToUTF8String(GetTransferablePath())); | ||
| 320 | } | ||
| 321 | InvalidatePrecompiled(); | ||
| 322 | } | ||
| 323 | |||
| 324 | void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { | ||
| 325 | // Clear virtaul precompiled cache file | ||
| 326 | precompiled_cache_virtual_file.Resize(0); | ||
| 327 | |||
| 328 | if (!Common::FS::RemoveFile(GetPrecompiledPath())) { | ||
| 329 | LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", | ||
| 330 | Common::FS::PathToUTF8String(GetPrecompiledPath())); | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 334 | void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { | ||
| 335 | if (!is_usable) { | ||
| 336 | return; | ||
| 337 | } | ||
| 338 | |||
| 339 | const u64 id = entry.unique_identifier; | ||
| 340 | if (stored_transferable.contains(id)) { | ||
| 341 | // The shader already exists | ||
| 342 | return; | ||
| 343 | } | ||
| 344 | |||
| 345 | Common::FS::IOFile file = AppendTransferableFile(); | ||
| 346 | if (!file.IsOpen()) { | ||
| 347 | return; | ||
| 348 | } | ||
| 349 | if (!entry.Save(file)) { | ||
| 350 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); | ||
| 351 | file.Close(); | ||
| 352 | InvalidateTransferable(); | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | |||
| 356 | stored_transferable.insert(id); | ||
| 357 | } | ||
| 358 | |||
| 359 | void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { | ||
| 360 | if (!is_usable) { | ||
| 361 | return; | ||
| 362 | } | ||
| 363 | |||
| 364 | // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header | ||
| 365 | // when writing the dump. This should be done the moment I get access to write to the virtual | ||
| 366 | // file. | ||
| 367 | if (precompiled_cache_virtual_file.GetSize() == 0) { | ||
| 368 | SavePrecompiledHeaderToVirtualPrecompiledCache(); | ||
| 369 | } | ||
| 370 | |||
| 371 | GLint binary_length; | ||
| 372 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); | ||
| 373 | |||
| 374 | GLenum binary_format; | ||
| 375 | std::vector<u8> binary(binary_length); | ||
| 376 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | ||
| 377 | |||
| 378 | if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || | ||
| 379 | !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || | ||
| 380 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 381 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", | ||
| 382 | unique_identifier); | ||
| 383 | InvalidatePrecompiled(); | ||
| 384 | } | ||
| 385 | } | ||
| 386 | |||
| 387 | Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { | ||
| 388 | if (!EnsureDirectories()) { | ||
| 389 | return {}; | ||
| 390 | } | ||
| 391 | |||
| 392 | const auto transferable_path{GetTransferablePath()}; | ||
| 393 | const bool existed = Common::FS::Exists(transferable_path); | ||
| 394 | |||
| 395 | Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, | ||
| 396 | Common::FS::FileType::BinaryFile}; | ||
| 397 | if (!file.IsOpen()) { | ||
| 398 | LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", | ||
| 399 | Common::FS::PathToUTF8String(transferable_path)); | ||
| 400 | return {}; | ||
| 401 | } | ||
| 402 | if (!existed || file.GetSize() == 0) { | ||
| 403 | // If the file didn't exist, write its version | ||
| 404 | if (!file.WriteObject(NativeVersion)) { | ||
| 405 | LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", | ||
| 406 | Common::FS::PathToUTF8String(transferable_path)); | ||
| 407 | return {}; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | return file; | ||
| 411 | } | ||
| 412 | |||
| 413 | void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { | ||
| 414 | const auto hash{GetShaderCacheVersionHash()}; | ||
| 415 | if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { | ||
| 416 | LOG_ERROR( | ||
| 417 | Render_OpenGL, | ||
| 418 | "Failed to write precompiled cache version hash to virtual precompiled cache file"); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | |||
| 422 | void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | ||
| 423 | precompiled_cache_virtual_file_offset = 0; | ||
| 424 | const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); | ||
| 425 | const std::vector<u8> compressed = | ||
| 426 | Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); | ||
| 427 | |||
| 428 | const auto precompiled_path = GetPrecompiledPath(); | ||
| 429 | Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, | ||
| 430 | Common::FS::FileType::BinaryFile}; | ||
| 431 | |||
| 432 | if (!file.IsOpen()) { | ||
| 433 | LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", | ||
| 434 | Common::FS::PathToUTF8String(precompiled_path)); | ||
| 435 | return; | ||
| 436 | } | ||
| 437 | if (file.Write(compressed) != compressed.size()) { | ||
| 438 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", | ||
| 439 | Common::FS::PathToUTF8String(precompiled_path)); | ||
| 440 | } | ||
| 441 | } | ||
| 442 | |||
| 443 | bool ShaderDiskCacheOpenGL::EnsureDirectories() const { | ||
| 444 | const auto CreateDir = [](const std::filesystem::path& dir) { | ||
| 445 | if (!Common::FS::CreateDir(dir)) { | ||
| 446 | LOG_ERROR(Render_OpenGL, "Failed to create directory={}", | ||
| 447 | Common::FS::PathToUTF8String(dir)); | ||
| 448 | return false; | ||
| 449 | } | ||
| 450 | return true; | ||
| 451 | }; | ||
| 452 | |||
| 453 | return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && | ||
| 454 | CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && | ||
| 455 | CreateDir(GetPrecompiledDir()); | ||
| 456 | } | ||
| 457 | |||
| 458 | std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { | ||
| 459 | return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); | ||
| 460 | } | ||
| 461 | |||
| 462 | std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { | ||
| 463 | return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); | ||
| 464 | } | ||
| 465 | |||
| 466 | std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { | ||
| 467 | return GetBaseDir() / "transferable"; | ||
| 468 | } | ||
| 469 | |||
| 470 | std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { | ||
| 471 | return GetBaseDir() / "precompiled"; | ||
| 472 | } | ||
| 473 | |||
| 474 | std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { | ||
| 475 | return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; | ||
| 476 | } | ||
| 477 | |||
| 478 | std::string ShaderDiskCacheOpenGL::GetTitleID() const { | ||
| 479 | return fmt::format("{:016X}", title_id); | ||
| 480 | } | ||
| 481 | |||
| 482 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h deleted file mode 100644 index f8bc23868..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ /dev/null | |||
| @@ -1,176 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <filesystem> | ||
| 8 | #include <optional> | ||
| 9 | #include <string> | ||
| 10 | #include <tuple> | ||
| 11 | #include <type_traits> | ||
| 12 | #include <unordered_map> | ||
| 13 | #include <unordered_set> | ||
| 14 | #include <utility> | ||
| 15 | #include <vector> | ||
| 16 | |||
| 17 | #include <glad/glad.h> | ||
| 18 | |||
| 19 | #include "common/assert.h" | ||
| 20 | #include "common/common_types.h" | ||
| 21 | #include "core/file_sys/vfs_vector.h" | ||
| 22 | #include "video_core/engines/shader_type.h" | ||
| 23 | #include "video_core/shader/registry.h" | ||
| 24 | |||
| 25 | namespace Common::FS { | ||
| 26 | class IOFile; | ||
| 27 | } | ||
| 28 | |||
| 29 | namespace OpenGL { | ||
| 30 | |||
| 31 | using ProgramCode = std::vector<u64>; | ||
| 32 | |||
| 33 | /// Describes a shader and how it's used by the guest GPU | ||
| 34 | struct ShaderDiskCacheEntry { | ||
| 35 | ShaderDiskCacheEntry(); | ||
| 36 | ~ShaderDiskCacheEntry(); | ||
| 37 | |||
| 38 | bool Load(Common::FS::IOFile& file); | ||
| 39 | |||
| 40 | bool Save(Common::FS::IOFile& file) const; | ||
| 41 | |||
| 42 | bool HasProgramA() const { | ||
| 43 | return !code.empty() && !code_b.empty(); | ||
| 44 | } | ||
| 45 | |||
| 46 | Tegra::Engines::ShaderType type{}; | ||
| 47 | ProgramCode code; | ||
| 48 | ProgramCode code_b; | ||
| 49 | |||
| 50 | u64 unique_identifier = 0; | ||
| 51 | std::optional<u32> texture_handler_size; | ||
| 52 | u32 bound_buffer = 0; | ||
| 53 | VideoCommon::Shader::GraphicsInfo graphics_info; | ||
| 54 | VideoCommon::Shader::ComputeInfo compute_info; | ||
| 55 | VideoCommon::Shader::KeyMap keys; | ||
| 56 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 57 | VideoCommon::Shader::SeparateSamplerMap separate_samplers; | ||
| 58 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 59 | }; | ||
| 60 | |||
| 61 | /// Contains an OpenGL dumped binary program | ||
| 62 | struct ShaderDiskCachePrecompiled { | ||
| 63 | u64 unique_identifier = 0; | ||
| 64 | GLenum binary_format = 0; | ||
| 65 | std::vector<u8> binary; | ||
| 66 | }; | ||
| 67 | |||
| 68 | class ShaderDiskCacheOpenGL { | ||
| 69 | public: | ||
| 70 | explicit ShaderDiskCacheOpenGL(); | ||
| 71 | ~ShaderDiskCacheOpenGL(); | ||
| 72 | |||
| 73 | /// Binds a title ID for all future operations. | ||
| 74 | void BindTitleID(u64 title_id); | ||
| 75 | |||
| 76 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. | ||
| 77 | std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); | ||
| 78 | |||
| 79 | /// Loads current game's precompiled cache. Invalidates on failure. | ||
| 80 | std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); | ||
| 81 | |||
| 82 | /// Removes the transferable (and precompiled) cache file. | ||
| 83 | void InvalidateTransferable(); | ||
| 84 | |||
| 85 | /// Removes the precompiled cache file and clears virtual precompiled cache file. | ||
| 86 | void InvalidatePrecompiled(); | ||
| 87 | |||
| 88 | /// Saves a raw dump to the transferable file. Checks for collisions. | ||
| 89 | void SaveEntry(const ShaderDiskCacheEntry& entry); | ||
| 90 | |||
| 91 | /// Saves a dump entry to the precompiled file. Does not check for collisions. | ||
| 92 | void SavePrecompiled(u64 unique_identifier, GLuint program); | ||
| 93 | |||
| 94 | /// Serializes virtual precompiled shader cache file to real file | ||
| 95 | void SaveVirtualPrecompiledFile(); | ||
| 96 | |||
| 97 | private: | ||
| 98 | /// Loads the transferable cache. Returns empty on failure. | ||
| 99 | std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( | ||
| 100 | Common::FS::IOFile& file); | ||
| 101 | |||
| 102 | /// Opens current game's transferable file and write it's header if it doesn't exist | ||
| 103 | Common::FS::IOFile AppendTransferableFile() const; | ||
| 104 | |||
| 105 | /// Save precompiled header to precompiled_cache_in_memory | ||
| 106 | void SavePrecompiledHeaderToVirtualPrecompiledCache(); | ||
| 107 | |||
| 108 | /// Create shader disk cache directories. Returns true on success. | ||
| 109 | bool EnsureDirectories() const; | ||
| 110 | |||
| 111 | /// Gets current game's transferable file path | ||
| 112 | std::filesystem::path GetTransferablePath() const; | ||
| 113 | |||
| 114 | /// Gets current game's precompiled file path | ||
| 115 | std::filesystem::path GetPrecompiledPath() const; | ||
| 116 | |||
| 117 | /// Get user's transferable directory path | ||
| 118 | std::filesystem::path GetTransferableDir() const; | ||
| 119 | |||
| 120 | /// Get user's precompiled directory path | ||
| 121 | std::filesystem::path GetPrecompiledDir() const; | ||
| 122 | |||
| 123 | /// Get user's shader directory path | ||
| 124 | std::filesystem::path GetBaseDir() const; | ||
| 125 | |||
| 126 | /// Get current game's title id | ||
| 127 | std::string GetTitleID() const; | ||
| 128 | |||
| 129 | template <typename T> | ||
| 130 | bool SaveArrayToPrecompiled(const T* data, std::size_t length) { | ||
| 131 | const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( | ||
| 132 | data, length, precompiled_cache_virtual_file_offset); | ||
| 133 | precompiled_cache_virtual_file_offset += write_length; | ||
| 134 | return write_length == sizeof(T) * length; | ||
| 135 | } | ||
| 136 | |||
| 137 | template <typename T> | ||
| 138 | bool LoadArrayFromPrecompiled(T* data, std::size_t length) { | ||
| 139 | const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( | ||
| 140 | data, length, precompiled_cache_virtual_file_offset); | ||
| 141 | precompiled_cache_virtual_file_offset += read_length; | ||
| 142 | return read_length == sizeof(T) * length; | ||
| 143 | } | ||
| 144 | |||
| 145 | template <typename T> | ||
| 146 | bool SaveObjectToPrecompiled(const T& object) { | ||
| 147 | return SaveArrayToPrecompiled(&object, 1); | ||
| 148 | } | ||
| 149 | |||
| 150 | bool SaveObjectToPrecompiled(bool object) { | ||
| 151 | const auto value = static_cast<u8>(object); | ||
| 152 | return SaveArrayToPrecompiled(&value, 1); | ||
| 153 | } | ||
| 154 | |||
| 155 | template <typename T> | ||
| 156 | bool LoadObjectFromPrecompiled(T& object) { | ||
| 157 | return LoadArrayFromPrecompiled(&object, 1); | ||
| 158 | } | ||
| 159 | |||
| 160 | // Stores whole precompiled cache which will be read from or saved to the precompiled chache | ||
| 161 | // file | ||
| 162 | FileSys::VectorVfsFile precompiled_cache_virtual_file; | ||
| 163 | // Stores the current offset of the precompiled cache file for IO purposes | ||
| 164 | std::size_t precompiled_cache_virtual_file_offset = 0; | ||
| 165 | |||
| 166 | // Stored transferable shaders | ||
| 167 | std::unordered_set<u64> stored_transferable; | ||
| 168 | |||
| 169 | /// Title ID to operate on | ||
| 170 | u64 title_id = 0; | ||
| 171 | |||
| 172 | // The cache has been loaded at boot | ||
| 173 | bool is_usable = false; | ||
| 174 | }; | ||
| 175 | |||
| 176 | } // namespace OpenGL | ||