diff options
| author | 2019-06-24 19:46:49 -0400 | |
|---|---|---|
| committer | 2019-07-09 08:14:36 -0400 | |
| commit | 8af6e6a05207b1c9736bd80a89ec3aed1f96dfea (patch) | |
| tree | 963d5d4d7e0f2ca7762e410f7c400ddd9d8ec3ba /src | |
| parent | Merge pull request #2661 from ogniK5377/audren-loop (diff) | |
| download | yuzu-8af6e6a05207b1c9736bd80a89ec3aed1f96dfea.tar.gz yuzu-8af6e6a05207b1c9736bd80a89ec3aed1f96dfea.tar.xz yuzu-8af6e6a05207b1c9736bd80a89ec3aed1f96dfea.zip | |
shader_ir: Implement a new shader scanner
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.cpp | 393 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.h | 55 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 39 |
5 files changed, 475 insertions, 16 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2554add28..2b4266f29 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" | 56 | "${VIDEO_CORE}/shader/decode/shift.cpp" |
| 57 | "${VIDEO_CORE}/shader/decode/video.cpp" | 57 | "${VIDEO_CORE}/shader/decode/video.cpp" |
| 58 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 58 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 59 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 60 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 59 | "${VIDEO_CORE}/shader/decode.cpp" | 61 | "${VIDEO_CORE}/shader/decode.cpp" |
| 60 | "${VIDEO_CORE}/shader/node.h" | 62 | "${VIDEO_CORE}/shader/node.h" |
| 61 | "${VIDEO_CORE}/shader/node_helper.cpp" | 63 | "${VIDEO_CORE}/shader/node_helper.cpp" |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6839abe71..cd32c65d3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -103,6 +103,8 @@ add_library(video_core STATIC | |||
| 103 | shader/decode/video.cpp | 103 | shader/decode/video.cpp |
| 104 | shader/decode/xmad.cpp | 104 | shader/decode/xmad.cpp |
| 105 | shader/decode/other.cpp | 105 | shader/decode/other.cpp |
| 106 | shader/control_flow.cpp | ||
| 107 | shader/control_flow.h | ||
| 106 | shader/decode.cpp | 108 | shader/decode.cpp |
| 107 | shader/node_helper.cpp | 109 | shader/node_helper.cpp |
| 108 | shader/node_helper.h | 110 | shader/node_helper.h |
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp new file mode 100644 index 000000000..fcf22c7f2 --- /dev/null +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -0,0 +1,393 @@ | |||
| 1 | |||
| 2 | #include <list> | ||
| 3 | #include <map> | ||
| 4 | #include <unordered_set> | ||
| 5 | #include <vector> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/shader/control_flow.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | |||
| 17 | constexpr s32 unassigned_branch = -2; | ||
| 18 | |||
| 19 | struct BlockBranchInfo { | ||
| 20 | Condition condition{}; | ||
| 21 | s32 address{exit_branch}; | ||
| 22 | bool kill{}; | ||
| 23 | bool is_sync{}; | ||
| 24 | bool is_brk{}; | ||
| 25 | }; | ||
| 26 | |||
| 27 | struct BlockInfo { | ||
| 28 | BlockInfo() {} | ||
| 29 | u32 start{}; | ||
| 30 | u32 end{}; | ||
| 31 | bool visited{}; | ||
| 32 | BlockBranchInfo branch{}; | ||
| 33 | |||
| 34 | bool IsInside(const u32 address) const { | ||
| 35 | return start <= address && address <= end; | ||
| 36 | } | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct Stamp { | ||
| 40 | Stamp() = default; | ||
| 41 | Stamp(u32 address, u32 target) : address{address}, target{target} {} | ||
| 42 | u32 address{}; | ||
| 43 | u32 target{}; | ||
| 44 | bool operator==(const Stamp& sb) const { | ||
| 45 | return std::tie(address, target) == std::tie(sb.address, sb.target); | ||
| 46 | } | ||
| 47 | bool operator<(const Stamp& sb) const { | ||
| 48 | return address < sb.address; | ||
| 49 | } | ||
| 50 | bool operator>(const Stamp& sb) const { | ||
| 51 | return address > sb.address; | ||
| 52 | } | ||
| 53 | bool operator<=(const Stamp& sb) const { | ||
| 54 | return address <= sb.address; | ||
| 55 | } | ||
| 56 | bool operator>=(const Stamp& sb) const { | ||
| 57 | return address >= sb.address; | ||
| 58 | } | ||
| 59 | }; | ||
| 60 | |||
| 61 | struct CFGRebuildState { | ||
| 62 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) | ||
| 63 | : program_code{program_code}, program_size{program_size} { | ||
| 64 | // queries.clear(); | ||
| 65 | block_info.clear(); | ||
| 66 | labels.clear(); | ||
| 67 | visited_address.clear(); | ||
| 68 | ssy_labels.clear(); | ||
| 69 | pbk_labels.clear(); | ||
| 70 | inspect_queries.clear(); | ||
| 71 | } | ||
| 72 | |||
| 73 | std::vector<BlockInfo> block_info{}; | ||
| 74 | std::list<u32> inspect_queries{}; | ||
| 75 | // std::list<Query> queries{}; | ||
| 76 | std::unordered_set<u32> visited_address{}; | ||
| 77 | std::unordered_set<u32> labels{}; | ||
| 78 | std::set<Stamp> ssy_labels; | ||
| 79 | std::set<Stamp> pbk_labels; | ||
| 80 | const ProgramCode& program_code; | ||
| 81 | const std::size_t program_size; | ||
| 82 | }; | ||
| 83 | |||
| 84 | enum class BlockCollision : u32 { None = 0, Found = 1, Inside = 2 }; | ||
| 85 | |||
| 86 | std::pair<BlockCollision, std::vector<BlockInfo>::iterator> TryGetBlock(CFGRebuildState& state, | ||
| 87 | u32 address) { | ||
| 88 | auto it = state.block_info.begin(); | ||
| 89 | while (it != state.block_info.end()) { | ||
| 90 | if (it->start == address) { | ||
| 91 | return {BlockCollision::Found, it}; | ||
| 92 | } | ||
| 93 | if (it->IsInside(address)) { | ||
| 94 | return {BlockCollision::Inside, it}; | ||
| 95 | } | ||
| 96 | it++; | ||
| 97 | } | ||
| 98 | return {BlockCollision::None, it}; | ||
| 99 | } | ||
| 100 | |||
| 101 | struct ParseInfo { | ||
| 102 | BlockBranchInfo branch_info{}; | ||
| 103 | u32 end_address{}; | ||
| 104 | }; | ||
| 105 | |||
| 106 | BlockInfo* CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { | ||
| 107 | auto& it = state.block_info.emplace_back(); | ||
| 108 | it.start = start; | ||
| 109 | it.end = end; | ||
| 110 | state.visited_address.insert(start); | ||
| 111 | return ⁢ | ||
| 112 | } | ||
| 113 | |||
| 114 | Pred GetPredicate(u32 index, bool negated) { | ||
| 115 | return static_cast<Pred>(index + (negated ? 8 : 0)); | ||
| 116 | } | ||
| 117 | |||
| 118 | enum class ParseResult : u32 { | ||
| 119 | ControlCaught = 0, | ||
| 120 | BlockEnd = 1, | ||
| 121 | AbnormalFlow = 2, | ||
| 122 | }; | ||
| 123 | |||
| 124 | ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info) { | ||
| 125 | |||
| 126 | u32 offset = static_cast<u32>(address); | ||
| 127 | u32 end_address = static_cast<u32>(state.program_size - 10U) * 8U; | ||
| 128 | |||
| 129 | auto insert_label = ([](CFGRebuildState& state, u32 address) { | ||
| 130 | auto pair = state.labels.emplace(address); | ||
| 131 | if (pair.second) { | ||
| 132 | state.inspect_queries.push_back(address); | ||
| 133 | } | ||
| 134 | }); | ||
| 135 | |||
| 136 | while (true) { | ||
| 137 | if (offset >= end_address) { | ||
| 138 | parse_info.branch_info.address = exit_branch; | ||
| 139 | break; | ||
| 140 | } | ||
| 141 | if (state.visited_address.count(offset) != 0) { | ||
| 142 | parse_info.branch_info.address = offset; | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | const Instruction instr = {state.program_code[offset]}; | ||
| 146 | const auto opcode = OpCode::Decode(instr); | ||
| 147 | if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { | ||
| 148 | offset++; | ||
| 149 | continue; | ||
| 150 | } | ||
| 151 | |||
| 152 | switch (opcode->get().GetId()) { | ||
| 153 | case OpCode::Id::EXIT: { | ||
| 154 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 155 | parse_info.branch_info.condition.predicate = | ||
| 156 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 157 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 158 | offset++; | ||
| 159 | continue; | ||
| 160 | } | ||
| 161 | const ConditionCode cc = instr.flow_condition_code; | ||
| 162 | parse_info.branch_info.condition.cc = cc; | ||
| 163 | if (cc == ConditionCode::F) { | ||
| 164 | offset++; | ||
| 165 | continue; | ||
| 166 | } | ||
| 167 | parse_info.branch_info.address = exit_branch; | ||
| 168 | parse_info.branch_info.kill = false; | ||
| 169 | parse_info.branch_info.is_sync = false; | ||
| 170 | parse_info.branch_info.is_brk = false; | ||
| 171 | parse_info.end_address = offset; | ||
| 172 | |||
| 173 | return ParseResult::ControlCaught; | ||
| 174 | } | ||
| 175 | case OpCode::Id::BRA: { | ||
| 176 | if (instr.bra.constant_buffer != 0) { | ||
| 177 | return ParseResult::AbnormalFlow; | ||
| 178 | } | ||
| 179 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 180 | parse_info.branch_info.condition.predicate = | ||
| 181 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 182 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 183 | offset++; | ||
| 184 | continue; | ||
| 185 | } | ||
| 186 | const ConditionCode cc = instr.flow_condition_code; | ||
| 187 | parse_info.branch_info.condition.cc = cc; | ||
| 188 | if (cc == ConditionCode::F) { | ||
| 189 | offset++; | ||
| 190 | continue; | ||
| 191 | } | ||
| 192 | u32 branch_offset = offset + instr.bra.GetBranchTarget(); | ||
| 193 | if (branch_offset == 0) { | ||
| 194 | parse_info.branch_info.address = exit_branch; | ||
| 195 | } else { | ||
| 196 | parse_info.branch_info.address = branch_offset; | ||
| 197 | } | ||
| 198 | insert_label(state, branch_offset); | ||
| 199 | parse_info.branch_info.kill = false; | ||
| 200 | parse_info.branch_info.is_sync = false; | ||
| 201 | parse_info.branch_info.is_brk = false; | ||
| 202 | parse_info.end_address = offset; | ||
| 203 | |||
| 204 | return ParseResult::ControlCaught; | ||
| 205 | } | ||
| 206 | case OpCode::Id::SYNC: { | ||
| 207 | parse_info.branch_info.condition; | ||
| 208 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 209 | parse_info.branch_info.condition.predicate = | ||
| 210 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 211 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 212 | offset++; | ||
| 213 | continue; | ||
| 214 | } | ||
| 215 | const ConditionCode cc = instr.flow_condition_code; | ||
| 216 | parse_info.branch_info.condition.cc = cc; | ||
| 217 | if (cc == ConditionCode::F) { | ||
| 218 | offset++; | ||
| 219 | continue; | ||
| 220 | } | ||
| 221 | parse_info.branch_info.address = unassigned_branch; | ||
| 222 | parse_info.branch_info.kill = false; | ||
| 223 | parse_info.branch_info.is_sync = true; | ||
| 224 | parse_info.branch_info.is_brk = false; | ||
| 225 | parse_info.end_address = offset; | ||
| 226 | |||
| 227 | return ParseResult::ControlCaught; | ||
| 228 | } | ||
| 229 | case OpCode::Id::BRK: { | ||
| 230 | parse_info.branch_info.condition; | ||
| 231 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 232 | parse_info.branch_info.condition.predicate = | ||
| 233 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 234 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 235 | offset++; | ||
| 236 | continue; | ||
| 237 | } | ||
| 238 | const ConditionCode cc = instr.flow_condition_code; | ||
| 239 | parse_info.branch_info.condition.cc = cc; | ||
| 240 | if (cc == ConditionCode::F) { | ||
| 241 | offset++; | ||
| 242 | continue; | ||
| 243 | } | ||
| 244 | parse_info.branch_info.address = unassigned_branch; | ||
| 245 | parse_info.branch_info.kill = false; | ||
| 246 | parse_info.branch_info.is_sync = false; | ||
| 247 | parse_info.branch_info.is_brk = true; | ||
| 248 | parse_info.end_address = offset; | ||
| 249 | |||
| 250 | return ParseResult::ControlCaught; | ||
| 251 | } | ||
| 252 | case OpCode::Id::KIL: { | ||
| 253 | parse_info.branch_info.condition; | ||
| 254 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 255 | parse_info.branch_info.condition.predicate = | ||
| 256 | GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 257 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 258 | offset++; | ||
| 259 | continue; | ||
| 260 | } | ||
| 261 | const ConditionCode cc = instr.flow_condition_code; | ||
| 262 | parse_info.branch_info.condition.cc = cc; | ||
| 263 | if (cc == ConditionCode::F) { | ||
| 264 | offset++; | ||
| 265 | continue; | ||
| 266 | } | ||
| 267 | parse_info.branch_info.address = exit_branch; | ||
| 268 | parse_info.branch_info.kill = true; | ||
| 269 | parse_info.branch_info.is_sync = false; | ||
| 270 | parse_info.branch_info.is_brk = false; | ||
| 271 | parse_info.end_address = offset; | ||
| 272 | |||
| 273 | return ParseResult::ControlCaught; | ||
| 274 | } | ||
| 275 | case OpCode::Id::SSY: { | ||
| 276 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 277 | insert_label(state, target); | ||
| 278 | state.ssy_labels.emplace(offset, target); | ||
| 279 | break; | ||
| 280 | } | ||
| 281 | case OpCode::Id::PBK: { | ||
| 282 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 283 | insert_label(state, target); | ||
| 284 | state.pbk_labels.emplace(offset, target); | ||
| 285 | break; | ||
| 286 | } | ||
| 287 | default: | ||
| 288 | break; | ||
| 289 | } | ||
| 290 | |||
| 291 | offset++; | ||
| 292 | } | ||
| 293 | parse_info.branch_info.kill = false; | ||
| 294 | parse_info.branch_info.is_sync = false; | ||
| 295 | parse_info.branch_info.is_brk = false; | ||
| 296 | parse_info.end_address = offset - 1; | ||
| 297 | return ParseResult::BlockEnd; | ||
| 298 | } | ||
| 299 | |||
| 300 | bool TryInspectAddress(CFGRebuildState& state) { | ||
| 301 | if (state.inspect_queries.empty()) { | ||
| 302 | return false; | ||
| 303 | } | ||
| 304 | u32 address = state.inspect_queries.front(); | ||
| 305 | state.inspect_queries.pop_front(); | ||
| 306 | auto search_result = TryGetBlock(state, address); | ||
| 307 | BlockInfo* block_info; | ||
| 308 | switch (search_result.first) { | ||
| 309 | case BlockCollision::Found: { | ||
| 310 | return true; | ||
| 311 | break; | ||
| 312 | } | ||
| 313 | case BlockCollision::Inside: { | ||
| 314 | // This case is the tricky one: | ||
| 315 | // We need to Split the block in 2 sepprate blocks | ||
| 316 | auto it = search_result.second; | ||
| 317 | block_info = CreateBlockInfo(state, address, it->end); | ||
| 318 | it->end = address - 1; | ||
| 319 | block_info->branch = it->branch; | ||
| 320 | BlockBranchInfo forward_branch{}; | ||
| 321 | forward_branch.address = address; | ||
| 322 | it->branch = forward_branch; | ||
| 323 | return true; | ||
| 324 | break; | ||
| 325 | } | ||
| 326 | default: | ||
| 327 | break; | ||
| 328 | } | ||
| 329 | ParseInfo parse_info; | ||
| 330 | ParseResult parse_result = ParseCode(state, address, parse_info); | ||
| 331 | if (parse_result == ParseResult::AbnormalFlow) { | ||
| 332 | // if it's the end of the program, end it safely | ||
| 333 | // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction | ||
| 334 | return false; | ||
| 335 | } | ||
| 336 | |||
| 337 | block_info = CreateBlockInfo(state, address, parse_info.end_address); | ||
| 338 | block_info->branch = parse_info.branch_info; | ||
| 339 | if (parse_info.branch_info.condition.IsUnconditional()) { | ||
| 340 | return true; | ||
| 341 | } | ||
| 342 | |||
| 343 | u32 fallthrough_address = parse_info.end_address + 1; | ||
| 344 | state.inspect_queries.push_front(fallthrough_address); | ||
| 345 | return true; | ||
| 346 | } | ||
| 347 | |||
| 348 | bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, | ||
| 349 | ShaderCharacteristics& result_out) { | ||
| 350 | CFGRebuildState state{program_code, program_size}; | ||
| 351 | // Inspect Code and generate blocks | ||
| 352 | state.labels.clear(); | ||
| 353 | state.labels.emplace(start_address); | ||
| 354 | state.inspect_queries.push_back(start_address); | ||
| 355 | while (!state.inspect_queries.empty()) { | ||
| 356 | if (!TryInspectAddress(state)) { | ||
| 357 | return false; | ||
| 358 | } | ||
| 359 | } | ||
| 360 | std::sort(state.block_info.begin(), state.block_info.end(), | ||
| 361 | [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); | ||
| 362 | // Remove unvisited blocks | ||
| 363 | result_out.blocks.clear(); | ||
| 364 | result_out.decompilable = false; | ||
| 365 | result_out.start = start_address; | ||
| 366 | result_out.end = start_address; | ||
| 367 | for (auto& block : state.block_info) { | ||
| 368 | ShaderBlock new_block{}; | ||
| 369 | new_block.start = block.start; | ||
| 370 | new_block.end = block.end; | ||
| 371 | new_block.branch.cond = block.branch.condition; | ||
| 372 | new_block.branch.kills = block.branch.kill; | ||
| 373 | new_block.branch.address = block.branch.address; | ||
| 374 | result_out.end = std::max(result_out.end, block.end); | ||
| 375 | result_out.blocks.push_back(new_block); | ||
| 376 | } | ||
| 377 | if (result_out.decompilable) { | ||
| 378 | return true; | ||
| 379 | } | ||
| 380 | auto back = result_out.blocks.begin(); | ||
| 381 | auto next = std::next(back); | ||
| 382 | while (next != result_out.blocks.end()) { | ||
| 383 | if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { | ||
| 384 | back->end = next->end; | ||
| 385 | next = result_out.blocks.erase(next); | ||
| 386 | continue; | ||
| 387 | } | ||
| 388 | back = next; | ||
| 389 | next++; | ||
| 390 | } | ||
| 391 | return true; | ||
| 392 | } | ||
| 393 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h new file mode 100644 index 000000000..16736d57a --- /dev/null +++ b/src/video_core/shader/control_flow.h | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <cstring> | ||
| 4 | #include <list> | ||
| 5 | #include <optional> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::ConditionCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | constexpr s32 exit_branch = -1; | ||
| 17 | |||
| 18 | struct Condition { | ||
| 19 | Pred predicate{Pred::UnusedIndex}; | ||
| 20 | ConditionCode cc{ConditionCode::T}; | ||
| 21 | |||
| 22 | bool IsUnconditional() const { | ||
| 23 | return (predicate == Pred::UnusedIndex) && (cc == ConditionCode::T); | ||
| 24 | } | ||
| 25 | }; | ||
| 26 | |||
| 27 | struct ShaderBlock { | ||
| 28 | ShaderBlock() {} | ||
| 29 | ShaderBlock(const ShaderBlock& sb) = default; | ||
| 30 | u32 start{}; | ||
| 31 | u32 end{}; | ||
| 32 | struct Branch { | ||
| 33 | Condition cond{}; | ||
| 34 | bool kills{}; | ||
| 35 | s32 address{}; | ||
| 36 | bool operator==(const Branch& b) const { | ||
| 37 | return std::memcmp(this, &b, sizeof(Branch)) == 0; | ||
| 38 | } | ||
| 39 | } branch; | ||
| 40 | bool operator==(const ShaderBlock& sb) const { | ||
| 41 | return std::memcmp(this, &sb, sizeof(ShaderBlock)) == 0; | ||
| 42 | } | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct ShaderCharacteristics { | ||
| 46 | std::list<ShaderBlock> blocks; | ||
| 47 | bool decompilable{}; | ||
| 48 | u32 start; | ||
| 49 | u32 end; | ||
| 50 | }; | ||
| 51 | |||
| 52 | bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, | ||
| 53 | ShaderCharacteristics& result_out); | ||
| 54 | |||
| 55 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2c9ff28f2..7f433c56b 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/engines/shader_header.h" | 13 | #include "video_core/engines/shader_header.h" |
| 14 | #include "video_core/shader/control_flow.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | 15 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| @@ -51,25 +52,31 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 51 | void ShaderIR::Decode() { | 52 | void ShaderIR::Decode() { |
| 52 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 53 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 53 | 54 | ||
| 54 | std::set<u32> labels; | 55 | ShaderCharacteristics shader_info{}; |
| 55 | const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | 56 | bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info); |
| 56 | if (exit_method != ExitMethod::AlwaysEnd) { | 57 | if (can_proceed) { |
| 57 | UNREACHABLE_MSG("Program does not always end"); | 58 | coverage_begin = shader_info.start; |
| 58 | } | 59 | coverage_end = shader_info.end; |
| 59 | 60 | if (shader_info.decompilable) { | |
| 60 | if (labels.empty()) { | 61 | return; |
| 61 | basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | 62 | } |
| 63 | // we can't decompile it, fallback to standard method | ||
| 64 | for (const auto& block : shader_info.blocks) { | ||
| 65 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 66 | } | ||
| 62 | return; | 67 | return; |
| 63 | } | 68 | } |
| 64 | 69 | LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); | |
| 65 | labels.insert(main_offset); | 70 | |
| 66 | 71 | // Now we need to deal with an undecompilable shader. We need to brute force | |
| 67 | for (const u32 label : labels) { | 72 | // a shader that captures every position. |
| 68 | const auto next_it = labels.lower_bound(label + 1); | 73 | coverage_begin = shader_info.start; |
| 69 | const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | 74 | const u32 shader_end = static_cast<u32>(MAX_PROGRAM_LENGTH); |
| 70 | 75 | coverage_end = shader_end; | |
| 71 | basic_blocks.insert({label, DecodeRange(label, next_label)}); | 76 | for (u32 label = main_offset; label < shader_end; label++) { |
| 77 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 72 | } | 78 | } |
| 79 | return; | ||
| 73 | } | 80 | } |
| 74 | 81 | ||
| 75 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | 82 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { |