diff options
| author | 2019-06-24 19:46:49 -0400 | |
|---|---|---|
| committer | 2019-07-09 08:14:36 -0400 | |
| commit | 8af6e6a05207b1c9736bd80a89ec3aed1f96dfea (patch) | |
| tree | 963d5d4d7e0f2ca7762e410f7c400ddd9d8ec3ba /src/video_core/shader/decode.cpp | |
| parent | Merge pull request #2661 from ogniK5377/audren-loop (diff) | |
| download | yuzu-8af6e6a05207b1c9736bd80a89ec3aed1f96dfea.tar.gz yuzu-8af6e6a05207b1c9736bd80a89ec3aed1f96dfea.tar.xz yuzu-8af6e6a05207b1c9736bd80a89ec3aed1f96dfea.zip | |
shader_ir: Implement a new shader scanner
Diffstat (limited to 'src/video_core/shader/decode.cpp')
| -rw-r--r-- | src/video_core/shader/decode.cpp | 39 |
1 files changed, 23 insertions, 16 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2c9ff28f2..7f433c56b 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/engines/shader_header.h" | 13 | #include "video_core/engines/shader_header.h" |
| 14 | #include "video_core/shader/control_flow.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | 15 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| @@ -51,25 +52,31 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 51 | void ShaderIR::Decode() { | 52 | void ShaderIR::Decode() { |
| 52 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 53 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 53 | 54 | ||
| 54 | std::set<u32> labels; | 55 | ShaderCharacteristics shader_info{}; |
| 55 | const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | 56 | bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info); |
| 56 | if (exit_method != ExitMethod::AlwaysEnd) { | 57 | if (can_proceed) { |
| 57 | UNREACHABLE_MSG("Program does not always end"); | 58 | coverage_begin = shader_info.start; |
| 58 | } | 59 | coverage_end = shader_info.end; |
| 59 | 60 | if (shader_info.decompilable) { | |
| 60 | if (labels.empty()) { | 61 | return; |
| 61 | basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | 62 | } |
| 63 | // we can't decompile it, fallback to standard method | ||
| 64 | for (const auto& block : shader_info.blocks) { | ||
| 65 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 66 | } | ||
| 62 | return; | 67 | return; |
| 63 | } | 68 | } |
| 64 | 69 | LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); | |
| 65 | labels.insert(main_offset); | 70 | |
| 66 | 71 | // Now we need to deal with an undecompilable shader. We need to brute force | |
| 67 | for (const u32 label : labels) { | 72 | // a shader that captures every position. |
| 68 | const auto next_it = labels.lower_bound(label + 1); | 73 | coverage_begin = shader_info.start; |
| 69 | const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | 74 | const u32 shader_end = static_cast<u32>(MAX_PROGRAM_LENGTH); |
| 70 | 75 | coverage_end = shader_end; | |
| 71 | basic_blocks.insert({label, DecodeRange(label, next_label)}); | 76 | for (u32 label = main_offset; label < shader_end; label++) { |
| 77 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 72 | } | 78 | } |
| 79 | return; | ||
| 73 | } | 80 | } |
| 74 | 81 | ||
| 75 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | 82 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { |