summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2019-06-24 19:46:49 -0400
committerGravatar FernandoS272019-07-09 08:14:36 -0400
commit8af6e6a05207b1c9736bd80a89ec3aed1f96dfea (patch)
tree963d5d4d7e0f2ca7762e410f7c400ddd9d8ec3ba /src
parentMerge pull request #2661 from ogniK5377/audren-loop (diff)
downloadyuzu-8af6e6a05207b1c9736bd80a89ec3aed1f96dfea.tar.gz
yuzu-8af6e6a05207b1c9736bd80a89ec3aed1f96dfea.tar.xz
yuzu-8af6e6a05207b1c9736bd80a89ec3aed1f96dfea.zip
shader_ir: Implement a new shader scanner
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/shader/control_flow.cpp393
-rw-r--r--src/video_core/shader/control_flow.h55
-rw-r--r--src/video_core/shader/decode.cpp39
5 files changed, 475 insertions, 16 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2554add28..2b4266f29 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp
56 "${VIDEO_CORE}/shader/decode/shift.cpp" 56 "${VIDEO_CORE}/shader/decode/shift.cpp"
57 "${VIDEO_CORE}/shader/decode/video.cpp" 57 "${VIDEO_CORE}/shader/decode/video.cpp"
58 "${VIDEO_CORE}/shader/decode/xmad.cpp" 58 "${VIDEO_CORE}/shader/decode/xmad.cpp"
59 "${VIDEO_CORE}/shader/control_flow.cpp"
60 "${VIDEO_CORE}/shader/control_flow.h"
59 "${VIDEO_CORE}/shader/decode.cpp" 61 "${VIDEO_CORE}/shader/decode.cpp"
60 "${VIDEO_CORE}/shader/node.h" 62 "${VIDEO_CORE}/shader/node.h"
61 "${VIDEO_CORE}/shader/node_helper.cpp" 63 "${VIDEO_CORE}/shader/node_helper.cpp"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6839abe71..cd32c65d3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -103,6 +103,8 @@ add_library(video_core STATIC
103 shader/decode/video.cpp 103 shader/decode/video.cpp
104 shader/decode/xmad.cpp 104 shader/decode/xmad.cpp
105 shader/decode/other.cpp 105 shader/decode/other.cpp
106 shader/control_flow.cpp
107 shader/control_flow.h
106 shader/decode.cpp 108 shader/decode.cpp
107 shader/node_helper.cpp 109 shader/node_helper.cpp
108 shader/node_helper.h 110 shader/node_helper.h
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
new file mode 100644
index 000000000..fcf22c7f2
--- /dev/null
+++ b/src/video_core/shader/control_flow.cpp
@@ -0,0 +1,393 @@
1
2#include <list>
3#include <map>
4#include <unordered_set>
5#include <vector>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/shader/control_flow.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16
17constexpr s32 unassigned_branch = -2;
18
19struct BlockBranchInfo {
20 Condition condition{};
21 s32 address{exit_branch};
22 bool kill{};
23 bool is_sync{};
24 bool is_brk{};
25};
26
27struct BlockInfo {
28 BlockInfo() {}
29 u32 start{};
30 u32 end{};
31 bool visited{};
32 BlockBranchInfo branch{};
33
34 bool IsInside(const u32 address) const {
35 return start <= address && address <= end;
36 }
37};
38
39struct Stamp {
40 Stamp() = default;
41 Stamp(u32 address, u32 target) : address{address}, target{target} {}
42 u32 address{};
43 u32 target{};
44 bool operator==(const Stamp& sb) const {
45 return std::tie(address, target) == std::tie(sb.address, sb.target);
46 }
47 bool operator<(const Stamp& sb) const {
48 return address < sb.address;
49 }
50 bool operator>(const Stamp& sb) const {
51 return address > sb.address;
52 }
53 bool operator<=(const Stamp& sb) const {
54 return address <= sb.address;
55 }
56 bool operator>=(const Stamp& sb) const {
57 return address >= sb.address;
58 }
59};
60
61struct CFGRebuildState {
62 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size)
63 : program_code{program_code}, program_size{program_size} {
64 // queries.clear();
65 block_info.clear();
66 labels.clear();
67 visited_address.clear();
68 ssy_labels.clear();
69 pbk_labels.clear();
70 inspect_queries.clear();
71 }
72
73 std::vector<BlockInfo> block_info{};
74 std::list<u32> inspect_queries{};
75 // std::list<Query> queries{};
76 std::unordered_set<u32> visited_address{};
77 std::unordered_set<u32> labels{};
78 std::set<Stamp> ssy_labels;
79 std::set<Stamp> pbk_labels;
80 const ProgramCode& program_code;
81 const std::size_t program_size;
82};
83
84enum class BlockCollision : u32 { None = 0, Found = 1, Inside = 2 };
85
86std::pair<BlockCollision, std::vector<BlockInfo>::iterator> TryGetBlock(CFGRebuildState& state,
87 u32 address) {
88 auto it = state.block_info.begin();
89 while (it != state.block_info.end()) {
90 if (it->start == address) {
91 return {BlockCollision::Found, it};
92 }
93 if (it->IsInside(address)) {
94 return {BlockCollision::Inside, it};
95 }
96 it++;
97 }
98 return {BlockCollision::None, it};
99}
100
101struct ParseInfo {
102 BlockBranchInfo branch_info{};
103 u32 end_address{};
104};
105
106BlockInfo* CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
107 auto& it = state.block_info.emplace_back();
108 it.start = start;
109 it.end = end;
110 state.visited_address.insert(start);
111 return &it;
112}
113
114Pred GetPredicate(u32 index, bool negated) {
115 return static_cast<Pred>(index + (negated ? 8 : 0));
116}
117
118enum class ParseResult : u32 {
119 ControlCaught = 0,
120 BlockEnd = 1,
121 AbnormalFlow = 2,
122};
123
124ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info) {
125
126 u32 offset = static_cast<u32>(address);
127 u32 end_address = static_cast<u32>(state.program_size - 10U) * 8U;
128
129 auto insert_label = ([](CFGRebuildState& state, u32 address) {
130 auto pair = state.labels.emplace(address);
131 if (pair.second) {
132 state.inspect_queries.push_back(address);
133 }
134 });
135
136 while (true) {
137 if (offset >= end_address) {
138 parse_info.branch_info.address = exit_branch;
139 break;
140 }
141 if (state.visited_address.count(offset) != 0) {
142 parse_info.branch_info.address = offset;
143 break;
144 }
145 const Instruction instr = {state.program_code[offset]};
146 const auto opcode = OpCode::Decode(instr);
147 if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
148 offset++;
149 continue;
150 }
151
152 switch (opcode->get().GetId()) {
153 case OpCode::Id::EXIT: {
154 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
155 parse_info.branch_info.condition.predicate =
156 GetPredicate(pred_index, instr.negate_pred != 0);
157 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
158 offset++;
159 continue;
160 }
161 const ConditionCode cc = instr.flow_condition_code;
162 parse_info.branch_info.condition.cc = cc;
163 if (cc == ConditionCode::F) {
164 offset++;
165 continue;
166 }
167 parse_info.branch_info.address = exit_branch;
168 parse_info.branch_info.kill = false;
169 parse_info.branch_info.is_sync = false;
170 parse_info.branch_info.is_brk = false;
171 parse_info.end_address = offset;
172
173 return ParseResult::ControlCaught;
174 }
175 case OpCode::Id::BRA: {
176 if (instr.bra.constant_buffer != 0) {
177 return ParseResult::AbnormalFlow;
178 }
179 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
180 parse_info.branch_info.condition.predicate =
181 GetPredicate(pred_index, instr.negate_pred != 0);
182 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
183 offset++;
184 continue;
185 }
186 const ConditionCode cc = instr.flow_condition_code;
187 parse_info.branch_info.condition.cc = cc;
188 if (cc == ConditionCode::F) {
189 offset++;
190 continue;
191 }
192 u32 branch_offset = offset + instr.bra.GetBranchTarget();
193 if (branch_offset == 0) {
194 parse_info.branch_info.address = exit_branch;
195 } else {
196 parse_info.branch_info.address = branch_offset;
197 }
198 insert_label(state, branch_offset);
199 parse_info.branch_info.kill = false;
200 parse_info.branch_info.is_sync = false;
201 parse_info.branch_info.is_brk = false;
202 parse_info.end_address = offset;
203
204 return ParseResult::ControlCaught;
205 }
206 case OpCode::Id::SYNC: {
207 parse_info.branch_info.condition;
208 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
209 parse_info.branch_info.condition.predicate =
210 GetPredicate(pred_index, instr.negate_pred != 0);
211 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
212 offset++;
213 continue;
214 }
215 const ConditionCode cc = instr.flow_condition_code;
216 parse_info.branch_info.condition.cc = cc;
217 if (cc == ConditionCode::F) {
218 offset++;
219 continue;
220 }
221 parse_info.branch_info.address = unassigned_branch;
222 parse_info.branch_info.kill = false;
223 parse_info.branch_info.is_sync = true;
224 parse_info.branch_info.is_brk = false;
225 parse_info.end_address = offset;
226
227 return ParseResult::ControlCaught;
228 }
229 case OpCode::Id::BRK: {
230 parse_info.branch_info.condition;
231 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
232 parse_info.branch_info.condition.predicate =
233 GetPredicate(pred_index, instr.negate_pred != 0);
234 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
235 offset++;
236 continue;
237 }
238 const ConditionCode cc = instr.flow_condition_code;
239 parse_info.branch_info.condition.cc = cc;
240 if (cc == ConditionCode::F) {
241 offset++;
242 continue;
243 }
244 parse_info.branch_info.address = unassigned_branch;
245 parse_info.branch_info.kill = false;
246 parse_info.branch_info.is_sync = false;
247 parse_info.branch_info.is_brk = true;
248 parse_info.end_address = offset;
249
250 return ParseResult::ControlCaught;
251 }
252 case OpCode::Id::KIL: {
253 parse_info.branch_info.condition;
254 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
255 parse_info.branch_info.condition.predicate =
256 GetPredicate(pred_index, instr.negate_pred != 0);
257 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
258 offset++;
259 continue;
260 }
261 const ConditionCode cc = instr.flow_condition_code;
262 parse_info.branch_info.condition.cc = cc;
263 if (cc == ConditionCode::F) {
264 offset++;
265 continue;
266 }
267 parse_info.branch_info.address = exit_branch;
268 parse_info.branch_info.kill = true;
269 parse_info.branch_info.is_sync = false;
270 parse_info.branch_info.is_brk = false;
271 parse_info.end_address = offset;
272
273 return ParseResult::ControlCaught;
274 }
275 case OpCode::Id::SSY: {
276 const u32 target = offset + instr.bra.GetBranchTarget();
277 insert_label(state, target);
278 state.ssy_labels.emplace(offset, target);
279 break;
280 }
281 case OpCode::Id::PBK: {
282 const u32 target = offset + instr.bra.GetBranchTarget();
283 insert_label(state, target);
284 state.pbk_labels.emplace(offset, target);
285 break;
286 }
287 default:
288 break;
289 }
290
291 offset++;
292 }
293 parse_info.branch_info.kill = false;
294 parse_info.branch_info.is_sync = false;
295 parse_info.branch_info.is_brk = false;
296 parse_info.end_address = offset - 1;
297 return ParseResult::BlockEnd;
298}
299
300bool TryInspectAddress(CFGRebuildState& state) {
301 if (state.inspect_queries.empty()) {
302 return false;
303 }
304 u32 address = state.inspect_queries.front();
305 state.inspect_queries.pop_front();
306 auto search_result = TryGetBlock(state, address);
307 BlockInfo* block_info;
308 switch (search_result.first) {
309 case BlockCollision::Found: {
310 return true;
311 break;
312 }
313 case BlockCollision::Inside: {
314 // This case is the tricky one:
315 // We need to Split the block in 2 sepprate blocks
316 auto it = search_result.second;
317 block_info = CreateBlockInfo(state, address, it->end);
318 it->end = address - 1;
319 block_info->branch = it->branch;
320 BlockBranchInfo forward_branch{};
321 forward_branch.address = address;
322 it->branch = forward_branch;
323 return true;
324 break;
325 }
326 default:
327 break;
328 }
329 ParseInfo parse_info;
330 ParseResult parse_result = ParseCode(state, address, parse_info);
331 if (parse_result == ParseResult::AbnormalFlow) {
332 // if it's the end of the program, end it safely
333 // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
334 return false;
335 }
336
337 block_info = CreateBlockInfo(state, address, parse_info.end_address);
338 block_info->branch = parse_info.branch_info;
339 if (parse_info.branch_info.condition.IsUnconditional()) {
340 return true;
341 }
342
343 u32 fallthrough_address = parse_info.end_address + 1;
344 state.inspect_queries.push_front(fallthrough_address);
345 return true;
346}
347
348bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address,
349 ShaderCharacteristics& result_out) {
350 CFGRebuildState state{program_code, program_size};
351 // Inspect Code and generate blocks
352 state.labels.clear();
353 state.labels.emplace(start_address);
354 state.inspect_queries.push_back(start_address);
355 while (!state.inspect_queries.empty()) {
356 if (!TryInspectAddress(state)) {
357 return false;
358 }
359 }
360 std::sort(state.block_info.begin(), state.block_info.end(),
361 [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
362 // Remove unvisited blocks
363 result_out.blocks.clear();
364 result_out.decompilable = false;
365 result_out.start = start_address;
366 result_out.end = start_address;
367 for (auto& block : state.block_info) {
368 ShaderBlock new_block{};
369 new_block.start = block.start;
370 new_block.end = block.end;
371 new_block.branch.cond = block.branch.condition;
372 new_block.branch.kills = block.branch.kill;
373 new_block.branch.address = block.branch.address;
374 result_out.end = std::max(result_out.end, block.end);
375 result_out.blocks.push_back(new_block);
376 }
377 if (result_out.decompilable) {
378 return true;
379 }
380 auto back = result_out.blocks.begin();
381 auto next = std::next(back);
382 while (next != result_out.blocks.end()) {
383 if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
384 back->end = next->end;
385 next = result_out.blocks.erase(next);
386 continue;
387 }
388 back = next;
389 next++;
390 }
391 return true;
392}
393} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
new file mode 100644
index 000000000..16736d57a
--- /dev/null
+++ b/src/video_core/shader/control_flow.h
@@ -0,0 +1,55 @@
1#pragma once
2
3#include <cstring>
4#include <list>
5#include <optional>
6#include <vector>
7
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::ConditionCode;
14using Tegra::Shader::Pred;
15
16constexpr s32 exit_branch = -1;
17
18struct Condition {
19 Pred predicate{Pred::UnusedIndex};
20 ConditionCode cc{ConditionCode::T};
21
22 bool IsUnconditional() const {
23 return (predicate == Pred::UnusedIndex) && (cc == ConditionCode::T);
24 }
25};
26
27struct ShaderBlock {
28 ShaderBlock() {}
29 ShaderBlock(const ShaderBlock& sb) = default;
30 u32 start{};
31 u32 end{};
32 struct Branch {
33 Condition cond{};
34 bool kills{};
35 s32 address{};
36 bool operator==(const Branch& b) const {
37 return std::memcmp(this, &b, sizeof(Branch)) == 0;
38 }
39 } branch;
40 bool operator==(const ShaderBlock& sb) const {
41 return std::memcmp(this, &sb, sizeof(ShaderBlock)) == 0;
42 }
43};
44
45struct ShaderCharacteristics {
46 std::list<ShaderBlock> blocks;
47 bool decompilable{};
48 u32 start;
49 u32 end;
50};
51
52bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address,
53 ShaderCharacteristics& result_out);
54
55} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2c9ff28f2..7f433c56b 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -11,6 +11,7 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/engines/shader_header.h" 13#include "video_core/engines/shader_header.h"
14#include "video_core/shader/control_flow.h"
14#include "video_core/shader/node_helper.h" 15#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
@@ -51,25 +52,31 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
51void ShaderIR::Decode() { 52void ShaderIR::Decode() {
52 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 53 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
53 54
54 std::set<u32> labels; 55 ShaderCharacteristics shader_info{};
55 const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); 56 bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info);
56 if (exit_method != ExitMethod::AlwaysEnd) { 57 if (can_proceed) {
57 UNREACHABLE_MSG("Program does not always end"); 58 coverage_begin = shader_info.start;
58 } 59 coverage_end = shader_info.end;
59 60 if (shader_info.decompilable) {
60 if (labels.empty()) { 61 return;
61 basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); 62 }
63 // we can't decompile it, fallback to standard method
64 for (const auto& block : shader_info.blocks) {
65 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
66 }
62 return; 67 return;
63 } 68 }
64 69 LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling");
65 labels.insert(main_offset); 70
66 71 // Now we need to deal with an undecompilable shader. We need to brute force
67 for (const u32 label : labels) { 72 // a shader that captures every position.
68 const auto next_it = labels.lower_bound(label + 1); 73 coverage_begin = shader_info.start;
69 const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; 74 const u32 shader_end = static_cast<u32>(MAX_PROGRAM_LENGTH);
70 75 coverage_end = shader_end;
71 basic_blocks.insert({label, DecodeRange(label, next_label)}); 76 for (u32 label = main_offset; label < shader_end; label++) {
77 basic_blocks.insert({label, DecodeRange(label, label + 1)});
72 } 78 }
79 return;
73} 80}
74 81
75ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { 82ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {