diff options
Diffstat (limited to 'src/video_core/shader/decode.cpp')
| -rw-r--r-- | src/video_core/shader/decode.cpp | 368 |
1 files changed, 0 insertions, 368 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp deleted file mode 100644 index 6576d1208..000000000 --- a/src/video_core/shader/decode.cpp +++ /dev/null | |||
| @@ -1,368 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <limits> | ||
| 7 | #include <set> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/engines/shader_header.h" | ||
| 15 | #include "video_core/shader/control_flow.h" | ||
| 16 | #include "video_core/shader/memory_util.h" | ||
| 17 | #include "video_core/shader/node_helper.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | using Tegra::Shader::Instruction; | ||
| 23 | using Tegra::Shader::OpCode; | ||
| 24 | |||
| 25 | namespace { | ||
| 26 | |||
| 27 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, | ||
| 28 | const std::list<SamplerEntry>& used_samplers) { | ||
| 29 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 30 | return; | ||
| 31 | } | ||
| 32 | u32 count{}; | ||
| 33 | std::vector<u32> bound_offsets; | ||
| 34 | for (const auto& sampler : used_samplers) { | ||
| 35 | if (sampler.is_bindless) { | ||
| 36 | continue; | ||
| 37 | } | ||
| 38 | ++count; | ||
| 39 | bound_offsets.emplace_back(sampler.offset); | ||
| 40 | } | ||
| 41 | if (count > 1) { | ||
| 42 | gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, | ||
| 47 | VideoCore::GuestDriverProfile& gpu_driver, | ||
| 48 | const std::list<SamplerEntry>& used_samplers) { | ||
| 49 | const u32 base_offset = sampler_to_deduce.offset; | ||
| 50 | u32 max_offset{std::numeric_limits<u32>::max()}; | ||
| 51 | for (const auto& sampler : used_samplers) { | ||
| 52 | if (sampler.is_bindless) { | ||
| 53 | continue; | ||
| 54 | } | ||
| 55 | if (sampler.offset > base_offset) { | ||
| 56 | max_offset = std::min(sampler.offset, max_offset); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | if (max_offset == std::numeric_limits<u32>::max()) { | ||
| 60 | return std::nullopt; | ||
| 61 | } | ||
| 62 | return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); | ||
| 63 | } | ||
| 64 | |||
| 65 | } // Anonymous namespace | ||
| 66 | |||
| 67 | class ASTDecoder { | ||
| 68 | public: | ||
| 69 | explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} | ||
| 70 | |||
| 71 | void operator()(ASTProgram& ast) { | ||
| 72 | ASTNode current = ast.nodes.GetFirst(); | ||
| 73 | while (current) { | ||
| 74 | Visit(current); | ||
| 75 | current = current->GetNext(); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | void operator()(ASTIfThen& ast) { | ||
| 80 | ASTNode current = ast.nodes.GetFirst(); | ||
| 81 | while (current) { | ||
| 82 | Visit(current); | ||
| 83 | current = current->GetNext(); | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | void operator()(ASTIfElse& ast) { | ||
| 88 | ASTNode current = ast.nodes.GetFirst(); | ||
| 89 | while (current) { | ||
| 90 | Visit(current); | ||
| 91 | current = current->GetNext(); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | void operator()(ASTBlockEncoded& ast) {} | ||
| 96 | |||
| 97 | void operator()(ASTBlockDecoded& ast) {} | ||
| 98 | |||
| 99 | void operator()(ASTVarSet& ast) {} | ||
| 100 | |||
| 101 | void operator()(ASTLabel& ast) {} | ||
| 102 | |||
| 103 | void operator()(ASTGoto& ast) {} | ||
| 104 | |||
| 105 | void operator()(ASTDoWhile& ast) { | ||
| 106 | ASTNode current = ast.nodes.GetFirst(); | ||
| 107 | while (current) { | ||
| 108 | Visit(current); | ||
| 109 | current = current->GetNext(); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | void operator()(ASTReturn& ast) {} | ||
| 114 | |||
| 115 | void operator()(ASTBreak& ast) {} | ||
| 116 | |||
| 117 | void Visit(ASTNode& node) { | ||
| 118 | std::visit(*this, *node->GetInnerData()); | ||
| 119 | if (node->IsBlockEncoded()) { | ||
| 120 | auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData()); | ||
| 121 | NodeBlock bb = ir.DecodeRange(block->start, block->end); | ||
| 122 | node->TransformBlockEncoded(std::move(bb)); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | private: | ||
| 127 | ShaderIR& ir; | ||
| 128 | }; | ||
| 129 | |||
| 130 | void ShaderIR::Decode() { | ||
| 131 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||
| 132 | |||
| 133 | decompiled = false; | ||
| 134 | auto info = ScanFlow(program_code, main_offset, settings, registry); | ||
| 135 | auto& shader_info = *info; | ||
| 136 | coverage_begin = shader_info.start; | ||
| 137 | coverage_end = shader_info.end; | ||
| 138 | switch (shader_info.settings.depth) { | ||
| 139 | case CompileDepth::FlowStack: { | ||
| 140 | for (const auto& block : shader_info.blocks) { | ||
| 141 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 142 | } | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | case CompileDepth::NoFlowStack: { | ||
| 146 | disable_flow_stack = true; | ||
| 147 | const auto insert_block = [this](NodeBlock& nodes, u32 label) { | ||
| 148 | if (label == static_cast<u32>(exit_branch)) { | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | basic_blocks.insert({label, nodes}); | ||
| 152 | }; | ||
| 153 | const auto& blocks = shader_info.blocks; | ||
| 154 | NodeBlock current_block; | ||
| 155 | u32 current_label = static_cast<u32>(exit_branch); | ||
| 156 | for (const auto& block : blocks) { | ||
| 157 | if (shader_info.labels.contains(block.start)) { | ||
| 158 | insert_block(current_block, current_label); | ||
| 159 | current_block.clear(); | ||
| 160 | current_label = block.start; | ||
| 161 | } | ||
| 162 | if (!block.ignore_branch) { | ||
| 163 | DecodeRangeInner(current_block, block.start, block.end); | ||
| 164 | InsertControlFlow(current_block, block); | ||
| 165 | } else { | ||
| 166 | DecodeRangeInner(current_block, block.start, block.end + 1); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | insert_block(current_block, current_label); | ||
| 170 | break; | ||
| 171 | } | ||
| 172 | case CompileDepth::DecompileBackwards: | ||
| 173 | case CompileDepth::FullDecompile: { | ||
| 174 | program_manager = std::move(shader_info.manager); | ||
| 175 | disable_flow_stack = true; | ||
| 176 | decompiled = true; | ||
| 177 | ASTDecoder decoder{*this}; | ||
| 178 | ASTNode program = GetASTProgram(); | ||
| 179 | decoder.Visit(program); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | default: | ||
| 183 | LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); | ||
| 184 | [[fallthrough]]; | ||
| 185 | case CompileDepth::BruteForce: { | ||
| 186 | const auto shader_end = static_cast<u32>(program_code.size()); | ||
| 187 | coverage_begin = main_offset; | ||
| 188 | coverage_end = shader_end; | ||
| 189 | for (u32 label = main_offset; label < shader_end; ++label) { | ||
| 190 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 191 | } | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | if (settings.depth != shader_info.settings.depth) { | ||
| 196 | LOG_WARNING( | ||
| 197 | HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", | ||
| 198 | CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | ||
| 203 | NodeBlock basic_block; | ||
| 204 | DecodeRangeInner(basic_block, begin, end); | ||
| 205 | return basic_block; | ||
| 206 | } | ||
| 207 | |||
| 208 | void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { | ||
| 209 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 210 | pc = DecodeInstr(bb, pc); | ||
| 211 | } | ||
| 212 | } | ||
| 213 | |||
| 214 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | ||
| 215 | const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { | ||
| 216 | Node result = n; | ||
| 217 | if (cond.cc != ConditionCode::T) { | ||
| 218 | result = Conditional(GetConditionCode(cond.cc), {result}); | ||
| 219 | } | ||
| 220 | if (cond.predicate != Pred::UnusedIndex) { | ||
| 221 | u32 pred = static_cast<u32>(cond.predicate); | ||
| 222 | const bool is_neg = pred > 7; | ||
| 223 | if (is_neg) { | ||
| 224 | pred -= 8; | ||
| 225 | } | ||
| 226 | result = Conditional(GetPredicate(pred, is_neg), {result}); | ||
| 227 | } | ||
| 228 | return result; | ||
| 229 | }; | ||
| 230 | if (std::holds_alternative<SingleBranch>(*block.branch)) { | ||
| 231 | auto branch = std::get_if<SingleBranch>(block.branch.get()); | ||
| 232 | if (branch->address < 0) { | ||
| 233 | if (branch->kill) { | ||
| 234 | Node n = Operation(OperationCode::Discard); | ||
| 235 | n = apply_conditions(branch->condition, n); | ||
| 236 | bb.push_back(n); | ||
| 237 | global_code.push_back(n); | ||
| 238 | return; | ||
| 239 | } | ||
| 240 | Node n = Operation(OperationCode::Exit); | ||
| 241 | n = apply_conditions(branch->condition, n); | ||
| 242 | bb.push_back(n); | ||
| 243 | global_code.push_back(n); | ||
| 244 | return; | ||
| 245 | } | ||
| 246 | Node n = Operation(OperationCode::Branch, Immediate(branch->address)); | ||
| 247 | n = apply_conditions(branch->condition, n); | ||
| 248 | bb.push_back(n); | ||
| 249 | global_code.push_back(n); | ||
| 250 | return; | ||
| 251 | } | ||
| 252 | auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); | ||
| 253 | Node op_a = GetRegister(multi_branch->gpr); | ||
| 254 | for (auto& branch_case : multi_branch->branches) { | ||
| 255 | Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); | ||
| 256 | Node op_b = Immediate(branch_case.cmp_value); | ||
| 257 | Node condition = | ||
| 258 | GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); | ||
| 259 | auto result = Conditional(condition, {n}); | ||
| 260 | bb.push_back(result); | ||
| 261 | global_code.push_back(result); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | |||
| 265 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | ||
| 266 | // Ignore sched instructions when generating code. | ||
| 267 | if (IsSchedInstruction(pc, main_offset)) { | ||
| 268 | return pc + 1; | ||
| 269 | } | ||
| 270 | |||
| 271 | const Instruction instr = {program_code[pc]}; | ||
| 272 | const auto opcode = OpCode::Decode(instr); | ||
| 273 | const u32 nv_address = ConvertAddressToNvidiaSpace(pc); | ||
| 274 | |||
| 275 | // Decoding failure | ||
| 276 | if (!opcode) { | ||
| 277 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||
| 278 | bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", | ||
| 279 | nv_address, instr.value))); | ||
| 280 | return pc + 1; | ||
| 281 | } | ||
| 282 | |||
| 283 | bb.push_back(Comment( | ||
| 284 | fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); | ||
| 285 | |||
| 286 | using Tegra::Shader::Pred; | ||
| 287 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||
| 288 | "NeverExecute predicate not implemented"); | ||
| 289 | |||
| 290 | static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = { | ||
| 291 | {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, | ||
| 292 | {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, | ||
| 293 | {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, | ||
| 294 | {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, | ||
| 295 | {OpCode::Type::Shift, &ShaderIR::DecodeShift}, | ||
| 296 | {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, | ||
| 297 | {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, | ||
| 298 | {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, | ||
| 299 | {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, | ||
| 300 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, | ||
| 301 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | ||
| 302 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | ||
| 303 | {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, | ||
| 304 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | ||
| 305 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, | ||
| 306 | {OpCode::Type::Image, &ShaderIR::DecodeImage}, | ||
| 307 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | ||
| 308 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | ||
| 309 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | ||
| 310 | {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, | ||
| 311 | {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, | ||
| 312 | {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, | ||
| 313 | {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, | ||
| 314 | {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, | ||
| 315 | {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, | ||
| 316 | {OpCode::Type::Video, &ShaderIR::DecodeVideo}, | ||
| 317 | {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, | ||
| 318 | }; | ||
| 319 | |||
| 320 | std::vector<Node> tmp_block; | ||
| 321 | if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { | ||
| 322 | pc = (this->*decoder->second)(tmp_block, pc); | ||
| 323 | } else { | ||
| 324 | pc = DecodeOther(tmp_block, pc); | ||
| 325 | } | ||
| 326 | |||
| 327 | // Some instructions (like SSY) don't have a predicate field, they are always unconditionally | ||
| 328 | // executed. | ||
| 329 | const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); | ||
| 330 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 331 | |||
| 332 | if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { | ||
| 333 | const Node conditional = | ||
| 334 | Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); | ||
| 335 | global_code.push_back(conditional); | ||
| 336 | bb.push_back(conditional); | ||
| 337 | } else { | ||
| 338 | for (auto& node : tmp_block) { | ||
| 339 | global_code.push_back(node); | ||
| 340 | bb.push_back(node); | ||
| 341 | } | ||
| 342 | } | ||
| 343 | |||
| 344 | return pc + 1; | ||
| 345 | } | ||
| 346 | |||
| 347 | void ShaderIR::PostDecode() { | ||
| 348 | // Deduce texture handler size if needed | ||
| 349 | auto gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 350 | DeduceTextureHandlerSize(gpu_driver, used_samplers); | ||
| 351 | // Deduce Indexed Samplers | ||
| 352 | if (!uses_indexed_samplers) { | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | for (auto& sampler : used_samplers) { | ||
| 356 | if (!sampler.is_indexed) { | ||
| 357 | continue; | ||
| 358 | } | ||
| 359 | if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { | ||
| 360 | sampler.size = *size; | ||
| 361 | } else { | ||
| 362 | LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); | ||
| 363 | sampler.size = 1; | ||
| 364 | } | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | } // namespace VideoCommon::Shader | ||