diff options
| author | 2019-08-16 16:25:02 -0400 | |
|---|---|---|
| committer | 2019-10-04 18:52:50 -0400 | |
| commit | 47e4f6a52c5eb34916e2c1f4c876e6e8624e3840 (patch) | |
| tree | 60ca95508197ceb868b004791caf81a042b22842 /src/video_core | |
| parent | gl_shader_decompiler: Implement AST decompiling (diff) | |
| download | yuzu-47e4f6a52c5eb34916e2c1f4c876e6e8624e3840.tar.gz yuzu-47e4f6a52c5eb34916e2c1f4c876e6e8624e3840.tar.xz yuzu-47e4f6a52c5eb34916e2c1f4c876e6e8624e3840.zip | |
Shader_Ir: Refactor Decompilation process and allow multiple decompilation modes.
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 25 | ||||
| -rw-r--r-- | src/video_core/shader/ast.cpp | 98 | ||||
| -rw-r--r-- | src/video_core/shader/ast.h | 20 | ||||
| -rw-r--r-- | src/video_core/shader/compiler_settings.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/shader/compiler_settings.h | 25 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.cpp | 92 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.h | 10 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 86 | ||||
| -rw-r--r-- | src/video_core/shader/decode/other.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 10 |
13 files changed, 334 insertions, 82 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 33fa88762..eaa694ff8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -109,6 +109,8 @@ add_library(video_core STATIC | |||
| 109 | shader/ast.h | 109 | shader/ast.h |
| 110 | shader/control_flow.cpp | 110 | shader/control_flow.cpp |
| 111 | shader/control_flow.h | 111 | shader/control_flow.h |
| 112 | shader/compiler_settings.cpp | ||
| 113 | shader/compiler_settings.h | ||
| 112 | shader/decode.cpp | 114 | shader/decode.cpp |
| 113 | shader/expr.cpp | 115 | shader/expr.cpp |
| 114 | shader/expr.h | 116 | shader/expr.h |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2955c6abf..b8c3442bc 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -352,9 +352,11 @@ public: | |||
| 352 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | 352 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems |
| 353 | // unlikely that shaders will use 20 nested SSYs and PBKs. | 353 | // unlikely that shaders will use 20 nested SSYs and PBKs. |
| 354 | constexpr u32 FLOW_STACK_SIZE = 20; | 354 | constexpr u32 FLOW_STACK_SIZE = 20; |
| 355 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | 355 | if (!ir.IsFlowStackDisabled()) { |
| 356 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | 356 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { |
| 357 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | 357 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); |
| 358 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||
| 359 | } | ||
| 358 | } | 360 | } |
| 359 | 361 | ||
| 360 | code.AddLine("while (true) {{"); | 362 | code.AddLine("while (true) {{"); |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 3a8d9e1da..72a49ebdc 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | namespace OpenGL::GLShader { | 11 | namespace OpenGL::GLShader { |
| 12 | 12 | ||
| 13 | using Tegra::Engines::Maxwell3D; | 13 | using Tegra::Engines::Maxwell3D; |
| 14 | using VideoCommon::Shader::CompileDepth; | ||
| 15 | using VideoCommon::Shader::CompilerSettings; | ||
| 14 | using VideoCommon::Shader::ProgramCode; | 16 | using VideoCommon::Shader::ProgramCode; |
| 15 | using VideoCommon::Shader::ShaderIR; | 17 | using VideoCommon::Shader::ShaderIR; |
| 16 | 18 | ||
| @@ -31,13 +33,17 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 31 | 33 | ||
| 32 | )"; | 34 | )"; |
| 33 | 35 | ||
| 34 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | 36 | CompilerSettings settings; |
| 37 | settings.depth = CompileDepth::NoFlowStack; | ||
| 38 | |||
| 39 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); | ||
| 35 | const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; | 40 | const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; |
| 36 | ProgramResult program = Decompile(device, program_ir, stage, "vertex"); | 41 | ProgramResult program = Decompile(device, program_ir, stage, "vertex"); |
| 37 | out += program.first; | 42 | out += program.first; |
| 38 | 43 | ||
| 39 | if (setup.IsDualProgram()) { | 44 | if (setup.IsDualProgram()) { |
| 40 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); | 45 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b, |
| 46 | settings); | ||
| 41 | ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); | 47 | ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); |
| 42 | out += program_b.first; | 48 | out += program_b.first; |
| 43 | } | 49 | } |
| @@ -80,7 +86,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 80 | 86 | ||
| 81 | )"; | 87 | )"; |
| 82 | 88 | ||
| 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | 89 | CompilerSettings settings; |
| 90 | settings.depth = CompileDepth::NoFlowStack; | ||
| 91 | |||
| 92 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); | ||
| 84 | ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); | 93 | ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); |
| 85 | out += program.first; | 94 | out += program.first; |
| 86 | 95 | ||
| @@ -114,7 +123,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 114 | }; | 123 | }; |
| 115 | 124 | ||
| 116 | )"; | 125 | )"; |
| 117 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | 126 | CompilerSettings settings; |
| 127 | settings.depth = CompileDepth::NoFlowStack; | ||
| 128 | |||
| 129 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); | ||
| 118 | ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); | 130 | ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); |
| 119 | out += program.first; | 131 | out += program.first; |
| 120 | 132 | ||
| @@ -133,7 +145,10 @@ ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& set | |||
| 133 | std::string out = "// Shader Unique Id: CS" + id + "\n\n"; | 145 | std::string out = "// Shader Unique Id: CS" + id + "\n\n"; |
| 134 | out += GetCommonDeclarations(); | 146 | out += GetCommonDeclarations(); |
| 135 | 147 | ||
| 136 | const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a); | 148 | CompilerSettings settings; |
| 149 | settings.depth = CompileDepth::NoFlowStack; | ||
| 150 | |||
| 151 | const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings); | ||
| 137 | ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); | 152 | ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); |
| 138 | out += program.first; | 153 | out += program.first; |
| 139 | 154 | ||
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index 68a96cc79..14c50e1c6 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp | |||
| @@ -363,7 +363,7 @@ std::string ASTManager::Print() { | |||
| 363 | return printer.GetResult(); | 363 | return printer.GetResult(); |
| 364 | } | 364 | } |
| 365 | 365 | ||
| 366 | ASTManager::ASTManager() = default; | 366 | ASTManager::ASTManager(bool full_decompile) : full_decompile{full_decompile} {}; |
| 367 | 367 | ||
| 368 | ASTManager::~ASTManager() { | 368 | ASTManager::~ASTManager() { |
| 369 | Clear(); | 369 | Clear(); |
| @@ -383,6 +383,7 @@ ASTManager::ASTManager(ASTManager&& other) | |||
| 383 | } | 383 | } |
| 384 | 384 | ||
| 385 | ASTManager& ASTManager::operator=(ASTManager&& other) { | 385 | ASTManager& ASTManager::operator=(ASTManager&& other) { |
| 386 | full_decompile = other.full_decompile; | ||
| 386 | labels_map = std::move(other.labels_map); | 387 | labels_map = std::move(other.labels_map); |
| 387 | labels_count = other.labels_count; | 388 | labels_count = other.labels_count; |
| 388 | gotos = std::move(other.gotos); | 389 | gotos = std::move(other.gotos); |
| @@ -434,6 +435,13 @@ void ASTManager::Decompile() { | |||
| 434 | ASTNode goto_node = *it; | 435 | ASTNode goto_node = *it; |
| 435 | u32 label_index = goto_node->GetGotoLabel(); | 436 | u32 label_index = goto_node->GetGotoLabel(); |
| 436 | ASTNode label = labels[label_index]; | 437 | ASTNode label = labels[label_index]; |
| 438 | if (!full_decompile) { | ||
| 439 | // We only decompile backward jumps | ||
| 440 | if (!IsBackwardsJump(goto_node, label)) { | ||
| 441 | it++; | ||
| 442 | continue; | ||
| 443 | } | ||
| 444 | } | ||
| 437 | if (IndirectlyRelated(goto_node, label)) { | 445 | if (IndirectlyRelated(goto_node, label)) { |
| 438 | while (!DirectlyRelated(goto_node, label)) { | 446 | while (!DirectlyRelated(goto_node, label)) { |
| 439 | MoveOutward(goto_node); | 447 | MoveOutward(goto_node); |
| @@ -469,11 +477,91 @@ void ASTManager::Decompile() { | |||
| 469 | } | 477 | } |
| 470 | it++; | 478 | it++; |
| 471 | } | 479 | } |
| 472 | for (ASTNode label : labels) { | 480 | if (full_decompile) { |
| 473 | auto& manager = label->GetManager(); | 481 | for (ASTNode label : labels) { |
| 474 | manager.Remove(label); | 482 | auto& manager = label->GetManager(); |
| 483 | manager.Remove(label); | ||
| 484 | } | ||
| 485 | labels.clear(); | ||
| 486 | } else { | ||
| 487 | auto it = labels.begin(); | ||
| 488 | while (it != labels.end()) { | ||
| 489 | bool can_remove = true; | ||
| 490 | ASTNode label = *it; | ||
| 491 | for (ASTNode goto_node : gotos) { | ||
| 492 | u32 label_index = goto_node->GetGotoLabel(); | ||
| 493 | ASTNode glabel = labels[label_index]; | ||
| 494 | if (glabel == label) { | ||
| 495 | can_remove = false; | ||
| 496 | break; | ||
| 497 | } | ||
| 498 | } | ||
| 499 | if (can_remove) { | ||
| 500 | auto& manager = label->GetManager(); | ||
| 501 | manager.Remove(label); | ||
| 502 | labels.erase(it); | ||
| 503 | } | ||
| 504 | } | ||
| 475 | } | 505 | } |
| 476 | labels.clear(); | 506 | } |
| 507 | |||
| 508 | bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { | ||
| 509 | u32 goto_level = goto_node->GetLevel(); | ||
| 510 | u32 label_level = label_node->GetLevel(); | ||
| 511 | while (goto_level > label_level) { | ||
| 512 | goto_level--; | ||
| 513 | goto_node = goto_node->GetParent(); | ||
| 514 | } | ||
| 515 | while (label_level > goto_level) { | ||
| 516 | label_level--; | ||
| 517 | label_node = label_node->GetParent(); | ||
| 518 | } | ||
| 519 | while (goto_node->GetParent() != label_node->GetParent()) { | ||
| 520 | goto_node = goto_node->GetParent(); | ||
| 521 | label_node = label_node->GetParent(); | ||
| 522 | } | ||
| 523 | ASTNode current = goto_node->GetPrevious(); | ||
| 524 | while (current) { | ||
| 525 | if (current == label_node) { | ||
| 526 | return true; | ||
| 527 | } | ||
| 528 | current = current->GetPrevious(); | ||
| 529 | } | ||
| 530 | return false; | ||
| 531 | } | ||
| 532 | |||
| 533 | ASTNode CommonParent(ASTNode first, ASTNode second) { | ||
| 534 | if (first->GetParent() == second->GetParent()) { | ||
| 535 | return first->GetParent(); | ||
| 536 | } | ||
| 537 | u32 first_level = first->GetLevel(); | ||
| 538 | u32 second_level = second->GetLevel(); | ||
| 539 | u32 min_level; | ||
| 540 | u32 max_level; | ||
| 541 | ASTNode max; | ||
| 542 | ASTNode min; | ||
| 543 | if (first_level > second_level) { | ||
| 544 | min_level = second_level; | ||
| 545 | min = second; | ||
| 546 | max_level = first_level; | ||
| 547 | max = first; | ||
| 548 | } else { | ||
| 549 | min_level = first_level; | ||
| 550 | min = first; | ||
| 551 | max_level = second_level; | ||
| 552 | max = second; | ||
| 553 | } | ||
| 554 | |||
| 555 | while (max_level > min_level) { | ||
| 556 | max_level--; | ||
| 557 | max = max->GetParent(); | ||
| 558 | } | ||
| 559 | |||
| 560 | while (min->GetParent() != max->GetParent()) { | ||
| 561 | min = min->GetParent(); | ||
| 562 | max = max->GetParent(); | ||
| 563 | } | ||
| 564 | return min->GetParent(); | ||
| 477 | } | 565 | } |
| 478 | 566 | ||
| 479 | bool ASTManager::IndirectlyRelated(ASTNode first, ASTNode second) { | 567 | bool ASTManager::IndirectlyRelated(ASTNode first, ASTNode second) { |
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index 06ab20cc5..849d0612c 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h | |||
| @@ -274,7 +274,7 @@ private: | |||
| 274 | 274 | ||
| 275 | class ASTManager final { | 275 | class ASTManager final { |
| 276 | public: | 276 | public: |
| 277 | ASTManager(); | 277 | ASTManager(bool full_decompile); |
| 278 | ~ASTManager(); | 278 | ~ASTManager(); |
| 279 | 279 | ||
| 280 | ASTManager(const ASTManager& o) = delete; | 280 | ASTManager(const ASTManager& o) = delete; |
| @@ -304,7 +304,18 @@ public: | |||
| 304 | void SanityCheck(); | 304 | void SanityCheck(); |
| 305 | 305 | ||
| 306 | bool IsFullyDecompiled() const { | 306 | bool IsFullyDecompiled() const { |
| 307 | return gotos.size() == 0; | 307 | if (full_decompile) { |
| 308 | return gotos.size() == 0; | ||
| 309 | } else { | ||
| 310 | for (ASTNode goto_node : gotos) { | ||
| 311 | u32 label_index = goto_node->GetGotoLabel(); | ||
| 312 | ASTNode glabel = labels[label_index]; | ||
| 313 | if (IsBackwardsJump(goto_node, glabel)) { | ||
| 314 | return false; | ||
| 315 | } | ||
| 316 | } | ||
| 317 | return true; | ||
| 318 | } | ||
| 308 | } | 319 | } |
| 309 | 320 | ||
| 310 | ASTNode GetProgram() const { | 321 | ASTNode GetProgram() const { |
| @@ -318,6 +329,10 @@ public: | |||
| 318 | } | 329 | } |
| 319 | 330 | ||
| 320 | private: | 331 | private: |
| 332 | bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; | ||
| 333 | |||
| 334 | ASTNode CommonParent(ASTNode first, ASTNode second); | ||
| 335 | |||
| 321 | bool IndirectlyRelated(ASTNode first, ASTNode second); | 336 | bool IndirectlyRelated(ASTNode first, ASTNode second); |
| 322 | 337 | ||
| 323 | bool DirectlyRelated(ASTNode first, ASTNode second); | 338 | bool DirectlyRelated(ASTNode first, ASTNode second); |
| @@ -334,6 +349,7 @@ private: | |||
| 334 | return new_var; | 349 | return new_var; |
| 335 | } | 350 | } |
| 336 | 351 | ||
| 352 | bool full_decompile{}; | ||
| 337 | std::unordered_map<u32, u32> labels_map{}; | 353 | std::unordered_map<u32, u32> labels_map{}; |
| 338 | u32 labels_count{}; | 354 | u32 labels_count{}; |
| 339 | std::vector<ASTNode> labels{}; | 355 | std::vector<ASTNode> labels{}; |
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp new file mode 100644 index 000000000..cddcbd4f0 --- /dev/null +++ b/src/video_core/shader/compiler_settings.cpp | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/shader/compiler_settings.h" | ||
| 6 | |||
| 7 | namespace VideoCommon::Shader { | ||
| 8 | |||
| 9 | std::string CompileDepthAsString(const CompileDepth cd) { | ||
| 10 | switch (cd) { | ||
| 11 | case CompileDepth::BruteForce: | ||
| 12 | return "Brute Force Compile"; | ||
| 13 | case CompileDepth::FlowStack: | ||
| 14 | return "Simple Flow Stack Mode"; | ||
| 15 | case CompileDepth::NoFlowStack: | ||
| 16 | return "Remove Flow Stack"; | ||
| 17 | case CompileDepth::DecompileBackwards: | ||
| 18 | return "Decompile Backward Jumps"; | ||
| 19 | case CompileDepth::FullDecompile: | ||
| 20 | return "Full Decompilation"; | ||
| 21 | default: | ||
| 22 | return "Unknown Compiler Process"; | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h new file mode 100644 index 000000000..e1fb5bc3a --- /dev/null +++ b/src/video_core/shader/compiler_settings.h | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | |||
| 9 | namespace VideoCommon::Shader { | ||
| 10 | |||
| 11 | enum class CompileDepth : u32 { | ||
| 12 | BruteForce = 0, | ||
| 13 | FlowStack = 1, | ||
| 14 | NoFlowStack = 2, | ||
| 15 | DecompileBackwards = 3, | ||
| 16 | FullDecompile = 4, | ||
| 17 | }; | ||
| 18 | |||
| 19 | std::string CompileDepthAsString(CompileDepth cd); | ||
| 20 | |||
| 21 | struct CompilerSettings { | ||
| 22 | CompileDepth depth; | ||
| 23 | }; | ||
| 24 | |||
| 25 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index a29922815..c4351969b 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -57,8 +57,8 @@ struct BlockInfo { | |||
| 57 | 57 | ||
| 58 | struct CFGRebuildState { | 58 | struct CFGRebuildState { |
| 59 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, | 59 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, |
| 60 | const u32 start, ASTManager& manager) | 60 | const u32 start) |
| 61 | : program_code{program_code}, program_size{program_size}, start{start}, manager{manager} {} | 61 | : program_code{program_code}, program_size{program_size}, start{start} {} |
| 62 | 62 | ||
| 63 | u32 start{}; | 63 | u32 start{}; |
| 64 | std::vector<BlockInfo> block_info{}; | 64 | std::vector<BlockInfo> block_info{}; |
| @@ -71,7 +71,7 @@ struct CFGRebuildState { | |||
| 71 | std::unordered_map<u32, BlockStack> stacks{}; | 71 | std::unordered_map<u32, BlockStack> stacks{}; |
| 72 | const ProgramCode& program_code; | 72 | const ProgramCode& program_code; |
| 73 | const std::size_t program_size; | 73 | const std::size_t program_size; |
| 74 | ASTManager& manager; | 74 | ASTManager* manager; |
| 75 | }; | 75 | }; |
| 76 | 76 | ||
| 77 | enum class BlockCollision : u32 { None, Found, Inside }; | 77 | enum class BlockCollision : u32 { None, Found, Inside }; |
| @@ -456,67 +456,91 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { | |||
| 456 | } | 456 | } |
| 457 | 457 | ||
| 458 | void DecompileShader(CFGRebuildState& state) { | 458 | void DecompileShader(CFGRebuildState& state) { |
| 459 | state.manager.Init(); | 459 | state.manager->Init(); |
| 460 | for (auto label : state.labels) { | 460 | for (auto label : state.labels) { |
| 461 | state.manager.DeclareLabel(label); | 461 | state.manager->DeclareLabel(label); |
| 462 | } | 462 | } |
| 463 | for (auto& block : state.block_info) { | 463 | for (auto& block : state.block_info) { |
| 464 | if (state.labels.count(block.start) != 0) { | 464 | if (state.labels.count(block.start) != 0) { |
| 465 | state.manager.InsertLabel(block.start); | 465 | state.manager->InsertLabel(block.start); |
| 466 | } | 466 | } |
| 467 | u32 end = block.branch.ignore ? block.end + 1 : block.end; | 467 | u32 end = block.branch.ignore ? block.end + 1 : block.end; |
| 468 | state.manager.InsertBlock(block.start, end); | 468 | state.manager->InsertBlock(block.start, end); |
| 469 | if (!block.branch.ignore) { | 469 | if (!block.branch.ignore) { |
| 470 | InsertBranch(state.manager, block.branch); | 470 | InsertBranch(*state.manager, block.branch); |
| 471 | } | 471 | } |
| 472 | } | 472 | } |
| 473 | // state.manager.ShowCurrentState("Before Decompiling"); | 473 | state.manager->Decompile(); |
| 474 | state.manager.Decompile(); | ||
| 475 | // state.manager.ShowCurrentState("After Decompiling"); | ||
| 476 | } | 474 | } |
| 477 | 475 | ||
| 478 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, | 476 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, |
| 479 | u32 start_address, ASTManager& manager) { | 477 | u32 start_address, |
| 480 | CFGRebuildState state{program_code, program_size, start_address, manager}; | 478 | const CompilerSettings& settings) { |
| 479 | auto result_out = std::make_unique<ShaderCharacteristics>(); | ||
| 480 | if (settings.depth == CompileDepth::BruteForce) { | ||
| 481 | result_out->settings.depth = CompileDepth::BruteForce; | ||
| 482 | return std::move(result_out); | ||
| 483 | } | ||
| 484 | |||
| 485 | CFGRebuildState state{program_code, program_size, start_address}; | ||
| 481 | // Inspect Code and generate blocks | 486 | // Inspect Code and generate blocks |
| 482 | state.labels.clear(); | 487 | state.labels.clear(); |
| 483 | state.labels.emplace(start_address); | 488 | state.labels.emplace(start_address); |
| 484 | state.inspect_queries.push_back(state.start); | 489 | state.inspect_queries.push_back(state.start); |
| 485 | while (!state.inspect_queries.empty()) { | 490 | while (!state.inspect_queries.empty()) { |
| 486 | if (!TryInspectAddress(state)) { | 491 | if (!TryInspectAddress(state)) { |
| 487 | return {}; | 492 | result_out->settings.depth = CompileDepth::BruteForce; |
| 493 | return std::move(result_out); | ||
| 488 | } | 494 | } |
| 489 | } | 495 | } |
| 490 | 496 | ||
| 491 | // Decompile Stacks | 497 | bool use_flow_stack = true; |
| 492 | state.queries.push_back(Query{state.start, {}, {}}); | 498 | |
| 493 | bool decompiled = true; | 499 | bool decompiled = false; |
| 494 | while (!state.queries.empty()) { | 500 | |
| 495 | if (!TryQuery(state)) { | 501 | if (settings.depth != CompileDepth::FlowStack) { |
| 496 | decompiled = false; | 502 | // Decompile Stacks |
| 497 | break; | 503 | state.queries.push_back(Query{state.start, {}, {}}); |
| 504 | decompiled = true; | ||
| 505 | while (!state.queries.empty()) { | ||
| 506 | if (!TryQuery(state)) { | ||
| 507 | decompiled = false; | ||
| 508 | break; | ||
| 509 | } | ||
| 498 | } | 510 | } |
| 499 | } | 511 | } |
| 500 | 512 | ||
| 513 | use_flow_stack = !decompiled; | ||
| 514 | |||
| 501 | // Sort and organize results | 515 | // Sort and organize results |
| 502 | std::sort(state.block_info.begin(), state.block_info.end(), | 516 | std::sort(state.block_info.begin(), state.block_info.end(), |
| 503 | [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); | 517 | [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); |
| 504 | if (decompiled) { | 518 | if (decompiled && settings.depth != CompileDepth::NoFlowStack) { |
| 519 | ASTManager manager{settings.depth != CompileDepth::DecompileBackwards}; | ||
| 520 | state.manager = &manager; | ||
| 505 | DecompileShader(state); | 521 | DecompileShader(state); |
| 506 | decompiled = state.manager.IsFullyDecompiled(); | 522 | decompiled = state.manager->IsFullyDecompiled(); |
| 507 | if (!decompiled) { | 523 | if (!decompiled) { |
| 508 | LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); | 524 | if (settings.depth == CompileDepth::FullDecompile) { |
| 509 | state.manager.ShowCurrentState("Of Shader"); | 525 | LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); |
| 510 | state.manager.Clear(); | 526 | } else { |
| 527 | LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); | ||
| 528 | } | ||
| 529 | state.manager->ShowCurrentState("Of Shader"); | ||
| 530 | state.manager->Clear(); | ||
| 531 | } else { | ||
| 532 | auto result_out = std::make_unique<ShaderCharacteristics>(); | ||
| 533 | result_out->start = start_address; | ||
| 534 | result_out->settings.depth = settings.depth; | ||
| 535 | result_out->manager = std::move(manager); | ||
| 536 | result_out->end = state.block_info.back().end + 1; | ||
| 537 | return std::move(result_out); | ||
| 511 | } | 538 | } |
| 512 | } | 539 | } |
| 513 | auto result_out = std::make_unique<ShaderCharacteristics>(); | ||
| 514 | result_out->decompiled = decompiled; | ||
| 515 | result_out->start = start_address; | 540 | result_out->start = start_address; |
| 516 | if (decompiled) { | 541 | result_out->settings.depth = |
| 517 | result_out->end = state.block_info.back().end + 1; | 542 | use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; |
| 518 | return std::move(result_out); | 543 | result_out->blocks.clear(); |
| 519 | } | ||
| 520 | for (auto& block : state.block_info) { | 544 | for (auto& block : state.block_info) { |
| 521 | ShaderBlock new_block{}; | 545 | ShaderBlock new_block{}; |
| 522 | new_block.start = block.start; | 546 | new_block.start = block.start; |
| @@ -530,6 +554,10 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | |||
| 530 | result_out->end = std::max(result_out->end, block.end); | 554 | result_out->end = std::max(result_out->end, block.end); |
| 531 | result_out->blocks.push_back(new_block); | 555 | result_out->blocks.push_back(new_block); |
| 532 | } | 556 | } |
| 557 | if (!use_flow_stack) { | ||
| 558 | result_out->labels = std::move(state.labels); | ||
| 559 | return std::move(result_out); | ||
| 560 | } | ||
| 533 | auto back = result_out->blocks.begin(); | 561 | auto back = result_out->blocks.begin(); |
| 534 | auto next = std::next(back); | 562 | auto next = std::next(back); |
| 535 | while (next != result_out->blocks.end()) { | 563 | while (next != result_out->blocks.end()) { |
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 347a35dcf..8d0d08422 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h | |||
| @@ -9,8 +9,9 @@ | |||
| 9 | #include <set> | 9 | #include <set> |
| 10 | 10 | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | 11 | #include "video_core/engines/shader_bytecode.h" |
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | #include "video_core/shader/ast.h" | 12 | #include "video_core/shader/ast.h" |
| 13 | #include "video_core/shader/compiler_settings.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 14 | 15 | ||
| 15 | namespace VideoCommon::Shader { | 16 | namespace VideoCommon::Shader { |
| 16 | 17 | ||
| @@ -68,12 +69,15 @@ struct ShaderBlock { | |||
| 68 | 69 | ||
| 69 | struct ShaderCharacteristics { | 70 | struct ShaderCharacteristics { |
| 70 | std::list<ShaderBlock> blocks{}; | 71 | std::list<ShaderBlock> blocks{}; |
| 71 | bool decompiled{}; | 72 | std::set<u32> labels{}; |
| 72 | u32 start{}; | 73 | u32 start{}; |
| 73 | u32 end{}; | 74 | u32 end{}; |
| 75 | ASTManager manager{true}; | ||
| 76 | CompilerSettings settings{}; | ||
| 74 | }; | 77 | }; |
| 75 | 78 | ||
| 76 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, | 79 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, |
| 77 | u32 start_address, ASTManager& manager); | 80 | u32 start_address, |
| 81 | const CompilerSettings& settings); | ||
| 78 | 82 | ||
| 79 | } // namespace VideoCommon::Shader | 83 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index e7e0903f6..6d4359295 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -102,35 +102,71 @@ void ShaderIR::Decode() { | |||
| 102 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 102 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 103 | 103 | ||
| 104 | decompiled = false; | 104 | decompiled = false; |
| 105 | const auto info = | 105 | auto info = ScanFlow(program_code, program_size, main_offset, settings); |
| 106 | ScanFlow(program_code, program_size, main_offset, program_manager); | 106 | auto& shader_info = *info; |
| 107 | if (info) { | 107 | coverage_begin = shader_info.start; |
| 108 | const auto& shader_info = *info; | 108 | coverage_end = shader_info.end; |
| 109 | coverage_begin = shader_info.start; | 109 | switch (shader_info.settings.depth) { |
| 110 | coverage_end = shader_info.end; | 110 | case CompileDepth::FlowStack: { |
| 111 | if (shader_info.decompiled) { | ||
| 112 | decompiled = true; | ||
| 113 | ASTDecoder decoder{*this}; | ||
| 114 | ASTNode program = GetASTProgram(); | ||
| 115 | decoder.Visit(program); | ||
| 116 | return; | ||
| 117 | } | ||
| 118 | LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); | ||
| 119 | // we can't decompile it, fallback to standard method | ||
| 120 | for (const auto& block : shader_info.blocks) { | 111 | for (const auto& block : shader_info.blocks) { |
| 121 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | 112 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); |
| 122 | } | 113 | } |
| 123 | return; | 114 | break; |
| 115 | } | ||
| 116 | case CompileDepth::NoFlowStack: { | ||
| 117 | disable_flow_stack = true; | ||
| 118 | const auto insert_block = [this](NodeBlock& nodes, u32 label) { | ||
| 119 | if (label == static_cast<u32>(exit_branch)) { | ||
| 120 | return; | ||
| 121 | } | ||
| 122 | basic_blocks.insert({label, nodes}); | ||
| 123 | }; | ||
| 124 | const auto& blocks = shader_info.blocks; | ||
| 125 | NodeBlock current_block; | ||
| 126 | u32 current_label = static_cast<u32>(exit_branch); | ||
| 127 | for (auto& block : blocks) { | ||
| 128 | if (shader_info.labels.count(block.start) != 0) { | ||
| 129 | insert_block(current_block, current_label); | ||
| 130 | current_block.clear(); | ||
| 131 | current_label = block.start; | ||
| 132 | } | ||
| 133 | if (!block.ignore_branch) { | ||
| 134 | DecodeRangeInner(current_block, block.start, block.end); | ||
| 135 | InsertControlFlow(current_block, block); | ||
| 136 | } else { | ||
| 137 | DecodeRangeInner(current_block, block.start, block.end + 1); | ||
| 138 | } | ||
| 139 | } | ||
| 140 | insert_block(current_block, current_label); | ||
| 141 | break; | ||
| 142 | } | ||
| 143 | case CompileDepth::DecompileBackwards: | ||
| 144 | case CompileDepth::FullDecompile: { | ||
| 145 | program_manager = std::move(shader_info.manager); | ||
| 146 | disable_flow_stack = true; | ||
| 147 | decompiled = true; | ||
| 148 | ASTDecoder decoder{*this}; | ||
| 149 | ASTNode program = GetASTProgram(); | ||
| 150 | decoder.Visit(program); | ||
| 151 | break; | ||
| 152 | } | ||
| 153 | default: | ||
| 154 | LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); | ||
| 155 | [[fallthrough]]; | ||
| 156 | case CompileDepth::BruteForce: { | ||
| 157 | coverage_begin = main_offset; | ||
| 158 | const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); | ||
| 159 | coverage_end = shader_end; | ||
| 160 | for (u32 label = main_offset; label < shader_end; label++) { | ||
| 161 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 162 | } | ||
| 163 | break; | ||
| 164 | } | ||
| 124 | } | 165 | } |
| 125 | LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); | 166 | if (settings.depth != shader_info.settings.depth) { |
| 126 | 167 | LOG_WARNING( | |
| 127 | // Now we need to deal with an undecompilable shader. We need to brute force | 168 | HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", |
| 128 | // a shader that captures every position. | 169 | CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); |
| 129 | coverage_begin = main_offset; | ||
| 130 | const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); | ||
| 131 | coverage_end = shader_end; | ||
| 132 | for (u32 label = main_offset; label < shader_end; label++) { | ||
| 133 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 134 | } | 170 | } |
| 135 | } | 171 | } |
| 136 | 172 | ||
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 6f678003c..d46e0f823 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -157,7 +157,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 157 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 157 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 158 | "Constant buffer flow is not supported"); | 158 | "Constant buffer flow is not supported"); |
| 159 | 159 | ||
| 160 | if (decompiled) { | 160 | if (disable_flow_stack) { |
| 161 | break; | 161 | break; |
| 162 | } | 162 | } |
| 163 | 163 | ||
| @@ -171,7 +171,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 171 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 171 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 172 | "Constant buffer PBK is not supported"); | 172 | "Constant buffer PBK is not supported"); |
| 173 | 173 | ||
| 174 | if (decompiled) { | 174 | if (disable_flow_stack) { |
| 175 | break; | 175 | break; |
| 176 | } | 176 | } |
| 177 | 177 | ||
| @@ -186,7 +186,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 186 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | 186 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", |
| 187 | static_cast<u32>(cc)); | 187 | static_cast<u32>(cc)); |
| 188 | 188 | ||
| 189 | if (decompiled) { | 189 | if (disable_flow_stack) { |
| 190 | break; | 190 | break; |
| 191 | } | 191 | } |
| 192 | 192 | ||
| @@ -198,7 +198,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 198 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 198 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 199 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | 199 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", |
| 200 | static_cast<u32>(cc)); | 200 | static_cast<u32>(cc)); |
| 201 | if (decompiled) { | 201 | if (disable_flow_stack) { |
| 202 | break; | 202 | break; |
| 203 | } | 203 | } |
| 204 | 204 | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 004b1e16f..04e364634 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -22,8 +22,10 @@ using Tegra::Shader::PredCondition; | |||
| 22 | using Tegra::Shader::PredOperation; | 22 | using Tegra::Shader::PredOperation; |
| 23 | using Tegra::Shader::Register; | 23 | using Tegra::Shader::Register; |
| 24 | 24 | ||
| 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size) | 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, |
| 26 | : program_code{program_code}, main_offset{main_offset}, program_size{size}, program_manager{} { | 26 | CompilerSettings settings) |
| 27 | : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, | ||
| 28 | program_manager{true}, settings{settings} { | ||
| 27 | Decode(); | 29 | Decode(); |
| 28 | } | 30 | } |
| 29 | 31 | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 48c7b722e..7a91c9bb6 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include "video_core/engines/shader_bytecode.h" | 16 | #include "video_core/engines/shader_bytecode.h" |
| 17 | #include "video_core/engines/shader_header.h" | 17 | #include "video_core/engines/shader_header.h" |
| 18 | #include "video_core/shader/ast.h" | 18 | #include "video_core/shader/ast.h" |
| 19 | #include "video_core/shader/compiler_settings.h" | ||
| 19 | #include "video_core/shader/node.h" | 20 | #include "video_core/shader/node.h" |
| 20 | 21 | ||
| 21 | namespace VideoCommon::Shader { | 22 | namespace VideoCommon::Shader { |
| @@ -65,7 +66,8 @@ struct GlobalMemoryUsage { | |||
| 65 | 66 | ||
| 66 | class ShaderIR final { | 67 | class ShaderIR final { |
| 67 | public: | 68 | public: |
| 68 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size); | 69 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, |
| 70 | CompilerSettings settings); | ||
| 69 | ~ShaderIR(); | 71 | ~ShaderIR(); |
| 70 | 72 | ||
| 71 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 73 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -141,6 +143,10 @@ public: | |||
| 141 | return header; | 143 | return header; |
| 142 | } | 144 | } |
| 143 | 145 | ||
| 146 | bool IsFlowStackDisabled() const { | ||
| 147 | return disable_flow_stack; | ||
| 148 | } | ||
| 149 | |||
| 144 | bool IsDecompiled() const { | 150 | bool IsDecompiled() const { |
| 145 | return decompiled; | 151 | return decompiled; |
| 146 | } | 152 | } |
| @@ -368,6 +374,7 @@ private: | |||
| 368 | const u32 main_offset; | 374 | const u32 main_offset; |
| 369 | const std::size_t program_size; | 375 | const std::size_t program_size; |
| 370 | bool decompiled{}; | 376 | bool decompiled{}; |
| 377 | bool disable_flow_stack{}; | ||
| 371 | 378 | ||
| 372 | u32 coverage_begin{}; | 379 | u32 coverage_begin{}; |
| 373 | u32 coverage_end{}; | 380 | u32 coverage_end{}; |
| @@ -375,6 +382,7 @@ private: | |||
| 375 | std::map<u32, NodeBlock> basic_blocks; | 382 | std::map<u32, NodeBlock> basic_blocks; |
| 376 | NodeBlock global_code; | 383 | NodeBlock global_code; |
| 377 | ASTManager program_manager; | 384 | ASTManager program_manager; |
| 385 | CompilerSettings settings{}; | ||
| 378 | 386 | ||
| 379 | std::set<u32> used_registers; | 387 | std::set<u32> used_registers; |
| 380 | std::set<Tegra::Shader::Pred> used_predicates; | 388 | std::set<Tegra::Shader::Pred> used_predicates; |