diff options
Diffstat (limited to 'src/video_core/shader/decode.cpp')
| -rw-r--r-- | src/video_core/shader/decode.cpp | 177 |
1 files changed, 93 insertions, 84 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2c9ff28f2..29c8895c5 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/engines/shader_header.h" | 13 | #include "video_core/engines/shader_header.h" |
| 14 | #include "video_core/shader/control_flow.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | 15 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| @@ -21,20 +22,6 @@ using Tegra::Shader::OpCode; | |||
| 21 | 22 | ||
| 22 | namespace { | 23 | namespace { |
| 23 | 24 | ||
| 24 | /// Merges exit method of two parallel branches. | ||
| 25 | constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 26 | if (a == ExitMethod::Undetermined) { | ||
| 27 | return b; | ||
| 28 | } | ||
| 29 | if (b == ExitMethod::Undetermined) { | ||
| 30 | return a; | ||
| 31 | } | ||
| 32 | if (a == b) { | ||
| 33 | return a; | ||
| 34 | } | ||
| 35 | return ExitMethod::Conditional; | ||
| 36 | } | ||
| 37 | |||
| 38 | /** | 25 | /** |
| 39 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | 26 | * Returns whether the instruction at the specified offset is a 'sched' instruction. |
| 40 | * Sched instructions always appear before a sequence of 3 instructions. | 27 | * Sched instructions always appear before a sequence of 3 instructions. |
| @@ -51,85 +38,104 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 51 | void ShaderIR::Decode() { | 38 | void ShaderIR::Decode() { |
| 52 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 39 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 53 | 40 | ||
| 54 | std::set<u32> labels; | 41 | disable_flow_stack = false; |
| 55 | const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | 42 | const auto info = ScanFlow(program_code, program_size, main_offset); |
| 56 | if (exit_method != ExitMethod::AlwaysEnd) { | 43 | if (info) { |
| 57 | UNREACHABLE_MSG("Program does not always end"); | 44 | const auto& shader_info = *info; |
| 58 | } | 45 | coverage_begin = shader_info.start; |
| 59 | 46 | coverage_end = shader_info.end; | |
| 60 | if (labels.empty()) { | 47 | if (shader_info.decompilable) { |
| 61 | basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | 48 | disable_flow_stack = true; |
| 49 | const auto insert_block = ([this](NodeBlock& nodes, u32 label) { | ||
| 50 | if (label == exit_branch) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | basic_blocks.insert({label, nodes}); | ||
| 54 | }); | ||
| 55 | const auto& blocks = shader_info.blocks; | ||
| 56 | NodeBlock current_block; | ||
| 57 | u32 current_label = exit_branch; | ||
| 58 | for (auto& block : blocks) { | ||
| 59 | if (shader_info.labels.count(block.start) != 0) { | ||
| 60 | insert_block(current_block, current_label); | ||
| 61 | current_block.clear(); | ||
| 62 | current_label = block.start; | ||
| 63 | } | ||
| 64 | if (!block.ignore_branch) { | ||
| 65 | DecodeRangeInner(current_block, block.start, block.end); | ||
| 66 | InsertControlFlow(current_block, block); | ||
| 67 | } else { | ||
| 68 | DecodeRangeInner(current_block, block.start, block.end + 1); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | insert_block(current_block, current_label); | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); | ||
| 75 | // we can't decompile it, fallback to standard method | ||
| 76 | for (const auto& block : shader_info.blocks) { | ||
| 77 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 78 | } | ||
| 62 | return; | 79 | return; |
| 63 | } | 80 | } |
| 81 | LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); | ||
| 82 | |||
| 83 | // Now we need to deal with an undecompilable shader. We need to brute force | ||
| 84 | // a shader that captures every position. | ||
| 85 | coverage_begin = main_offset; | ||
| 86 | const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); | ||
| 87 | coverage_end = shader_end; | ||
| 88 | for (u32 label = main_offset; label < shader_end; label++) { | ||
| 89 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 90 | } | ||
| 91 | } | ||
| 64 | 92 | ||
| 65 | labels.insert(main_offset); | 93 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { |
| 66 | 94 | NodeBlock basic_block; | |
| 67 | for (const u32 label : labels) { | 95 | DecodeRangeInner(basic_block, begin, end); |
| 68 | const auto next_it = labels.lower_bound(label + 1); | 96 | return basic_block; |
| 69 | const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | 97 | } |
| 70 | 98 | ||
| 71 | basic_blocks.insert({label, DecodeRange(label, next_label)}); | 99 | void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { |
| 100 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 101 | pc = DecodeInstr(bb, pc); | ||
| 72 | } | 102 | } |
| 73 | } | 103 | } |
| 74 | 104 | ||
| 75 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | 105 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { |
| 76 | const auto [iter, inserted] = | 106 | const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { |
| 77 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | 107 | Node result = n; |
| 78 | ExitMethod& exit_method = iter->second; | 108 | if (cond.cc != ConditionCode::T) { |
| 79 | if (!inserted) | 109 | result = Conditional(GetConditionCode(cond.cc), {result}); |
| 80 | return exit_method; | ||
| 81 | |||
| 82 | for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { | ||
| 83 | coverage_begin = std::min(coverage_begin, offset); | ||
| 84 | coverage_end = std::max(coverage_end, offset + 1); | ||
| 85 | |||
| 86 | const Instruction instr = {program_code[offset]}; | ||
| 87 | const auto opcode = OpCode::Decode(instr); | ||
| 88 | if (!opcode) | ||
| 89 | continue; | ||
| 90 | switch (opcode->get().GetId()) { | ||
| 91 | case OpCode::Id::EXIT: { | ||
| 92 | // The EXIT instruction can be predicated, which means that the shader can conditionally | ||
| 93 | // end on this instruction. We have to consider the case where the condition is not met | ||
| 94 | // and check the exit method of that other basic block. | ||
| 95 | using Tegra::Shader::Pred; | ||
| 96 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 97 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 98 | } else { | ||
| 99 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 100 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 101 | } | ||
| 102 | } | 110 | } |
| 103 | case OpCode::Id::BRA: { | 111 | if (cond.predicate != Pred::UnusedIndex) { |
| 104 | const u32 target = offset + instr.bra.GetBranchTarget(); | 112 | u32 pred = static_cast<u32>(cond.predicate); |
| 105 | labels.insert(target); | 113 | const bool is_neg = pred > 7; |
| 106 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | 114 | if (is_neg) { |
| 107 | const ExitMethod jmp = Scan(target, end, labels); | 115 | pred -= 8; |
| 108 | return exit_method = ParallelExit(no_jmp, jmp); | 116 | } |
| 109 | } | 117 | result = Conditional(GetPredicate(pred, is_neg), {result}); |
| 110 | case OpCode::Id::SSY: | ||
| 111 | case OpCode::Id::PBK: { | ||
| 112 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 113 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 114 | "Constant buffer branching is not supported"); | ||
| 115 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 116 | labels.insert(target); | ||
| 117 | // Continue scanning for an exit method. | ||
| 118 | break; | ||
| 119 | } | 118 | } |
| 120 | default: | 119 | return result; |
| 121 | break; | 120 | }); |
| 121 | if (block.branch.address < 0) { | ||
| 122 | if (block.branch.kills) { | ||
| 123 | Node n = Operation(OperationCode::Discard); | ||
| 124 | n = apply_conditions(block.branch.cond, n); | ||
| 125 | bb.push_back(n); | ||
| 126 | global_code.push_back(n); | ||
| 127 | return; | ||
| 122 | } | 128 | } |
| 129 | Node n = Operation(OperationCode::Exit); | ||
| 130 | n = apply_conditions(block.branch.cond, n); | ||
| 131 | bb.push_back(n); | ||
| 132 | global_code.push_back(n); | ||
| 133 | return; | ||
| 123 | } | 134 | } |
| 124 | return exit_method = ExitMethod::AlwaysReturn; | 135 | Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); |
| 125 | } | 136 | n = apply_conditions(block.branch.cond, n); |
| 126 | 137 | bb.push_back(n); | |
| 127 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | 138 | global_code.push_back(n); |
| 128 | NodeBlock basic_block; | ||
| 129 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 130 | pc = DecodeInstr(basic_block, pc); | ||
| 131 | } | ||
| 132 | return basic_block; | ||
| 133 | } | 139 | } |
| 134 | 140 | ||
| 135 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | 141 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { |
| @@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 140 | 146 | ||
| 141 | const Instruction instr = {program_code[pc]}; | 147 | const Instruction instr = {program_code[pc]}; |
| 142 | const auto opcode = OpCode::Decode(instr); | 148 | const auto opcode = OpCode::Decode(instr); |
| 149 | const u32 nv_address = ConvertAddressToNvidiaSpace(pc); | ||
| 143 | 150 | ||
| 144 | // Decoding failure | 151 | // Decoding failure |
| 145 | if (!opcode) { | 152 | if (!opcode) { |
| 146 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | 153 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); |
| 154 | bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", | ||
| 155 | nv_address, instr.value))); | ||
| 147 | return pc + 1; | 156 | return pc + 1; |
| 148 | } | 157 | } |
| 149 | 158 | ||
| 150 | bb.push_back( | 159 | bb.push_back(Comment( |
| 151 | Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); | 160 | fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); |
| 152 | 161 | ||
| 153 | using Tegra::Shader::Pred; | 162 | using Tegra::Shader::Pred; |
| 154 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | 163 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, |