summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp10
-rw-r--r--src/video_core/shader/control_flow.cpp47
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp35
-rw-r--r--src/video_core/shader/decode/other.cpp30
-rw-r--r--src/video_core/shader/node.h12
-rw-r--r--src/video_core/shader/shader_ir.h6
7 files changed, 85 insertions, 58 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index cedfe30b1..bfc975a04 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -191,10 +191,12 @@ public:
191 191
192 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems 192 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
193 // unlikely that shaders will use 20 nested SSYs and PBKs. 193 // unlikely that shaders will use 20 nested SSYs and PBKs.
194 constexpr u32 FLOW_STACK_SIZE = 20; 194 if (!ir.IsFlowStackDisabled()) {
195 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { 195 constexpr u32 FLOW_STACK_SIZE = 20;
196 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); 196 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
197 code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); 197 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
198 code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
199 }
198 } 200 }
199 201
200 code.AddLine("while (true) {{"); 202 code.AddLine("while (true) {{");
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 3af4c6190..c99d95b57 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -1,5 +1,6 @@
1 1
2#include <list> 2#include <list>
3#include <map>
3#include <unordered_map> 4#include <unordered_map>
4#include <unordered_set> 5#include <unordered_set>
5#include <vector> 6#include <vector>
@@ -104,28 +105,6 @@ struct BlockInfo {
104 } 105 }
105}; 106};
106 107
107struct Stamp {
108 Stamp() = default;
109 Stamp(u32 address, u32 target) : address{address}, target{target} {}
110 u32 address{};
111 u32 target{};
112 bool operator==(const Stamp& sb) const {
113 return std::tie(address, target) == std::tie(sb.address, sb.target);
114 }
115 bool operator<(const Stamp& sb) const {
116 return address < sb.address;
117 }
118 bool operator>(const Stamp& sb) const {
119 return address > sb.address;
120 }
121 bool operator<=(const Stamp& sb) const {
122 return address <= sb.address;
123 }
124 bool operator>=(const Stamp& sb) const {
125 return address >= sb.address;
126 }
127};
128
129struct CFGRebuildState { 108struct CFGRebuildState {
130 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) 109 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size)
131 : program_code{program_code}, program_size{program_size} { 110 : program_code{program_code}, program_size{program_size} {
@@ -144,8 +123,8 @@ struct CFGRebuildState {
144 std::list<Query> queries{}; 123 std::list<Query> queries{};
145 std::unordered_map<u32, u32> registered{}; 124 std::unordered_map<u32, u32> registered{};
146 std::unordered_set<u32> labels{}; 125 std::unordered_set<u32> labels{};
147 std::set<Stamp> ssy_labels; 126 std::map<u32, u32> ssy_labels;
148 std::set<Stamp> pbk_labels; 127 std::map<u32, u32> pbk_labels;
149 std::unordered_map<u32, BlockStack> stacks{}; 128 std::unordered_map<u32, BlockStack> stacks{};
150 const ProgramCode& program_code; 129 const ProgramCode& program_code;
151 const std::size_t program_size; 130 const std::size_t program_size;
@@ -393,7 +372,7 @@ bool TryInspectAddress(CFGRebuildState& state) {
393 } 372 }
394 case BlockCollision::Inside: { 373 case BlockCollision::Inside: {
395 // This case is the tricky one: 374 // This case is the tricky one:
396 // We need to Split the block in 2 sepprate blocks 375 // We need to Split the block in 2 sepparate blocks
397 auto it = search_result.second; 376 auto it = search_result.second;
398 block_info = CreateBlockInfo(state, address, it->end); 377 block_info = CreateBlockInfo(state, address, it->end);
399 it->end = address - 1; 378 it->end = address - 1;
@@ -428,13 +407,11 @@ bool TryInspectAddress(CFGRebuildState& state) {
428} 407}
429 408
430bool TryQuery(CFGRebuildState& state) { 409bool TryQuery(CFGRebuildState& state) {
431 auto gather_labels = ([](ControlStack& cc, std::set<Stamp> labels, BlockInfo& block) { 410 auto gather_labels = ([](ControlStack& cc, std::map<u32, u32>& labels, BlockInfo& block) {
432 Stamp start{block.start, 0}; 411 auto gather_start = labels.lower_bound(block.start);
433 Stamp end{block.end, 0}; 412 auto gather_end = labels.upper_bound(block.end);
434 auto gather_start = labels.lower_bound(start);
435 auto gather_end = labels.upper_bound(end);
436 while (gather_start != gather_end) { 413 while (gather_start != gather_end) {
437 cc.Push(gather_start->target); 414 cc.Push(gather_start->second);
438 gather_start++; 415 gather_start++;
439 } 416 }
440 }); 417 });
@@ -444,9 +421,13 @@ bool TryQuery(CFGRebuildState& state) {
444 Query& q = state.queries.front(); 421 Query& q = state.queries.front();
445 u32 block_index = state.registered[q.address]; 422 u32 block_index = state.registered[q.address];
446 BlockInfo& block = state.block_info[block_index]; 423 BlockInfo& block = state.block_info[block_index];
424 // If the block is visted, check if the stacks match, else gather the ssy/pbk
425 // labels into the current stack and look if the branch at the end of the block
426 // consumes a label. Schedule new queries accordingly
447 if (block.visited) { 427 if (block.visited) {
448 BlockStack& stack = state.stacks[q.address]; 428 BlockStack& stack = state.stacks[q.address];
449 bool all_okay = q.ssy_stack.Compare(stack.ssy_stack) && q.pbk_stack.Compare(stack.pbk_stack); 429 bool all_okay = (stack.ssy_stack.Size() == 0 || q.ssy_stack.Compare(stack.ssy_stack)) &&
430 (stack.pbk_stack.Size() == 0 || q.pbk_stack.Compare(stack.pbk_stack));
450 state.queries.pop_front(); 431 state.queries.pop_front();
451 return all_okay; 432 return all_okay;
452 } 433 }
@@ -523,8 +504,10 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre
523 result_out.blocks.push_back(new_block); 504 result_out.blocks.push_back(new_block);
524 } 505 }
525 if (result_out.decompilable) { 506 if (result_out.decompilable) {
507 result_out.labels = std::move(state.labels);
526 return true; 508 return true;
527 } 509 }
510 // If it's not decompilable, merge the unlabelled blocks together
528 auto back = result_out.blocks.begin(); 511 auto back = result_out.blocks.begin();
529 auto next = std::next(back); 512 auto next = std::next(back);
530 while (next != result_out.blocks.end()) { 513 while (next != result_out.blocks.end()) {
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index f5d37a231..4a2cd622c 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -3,7 +3,7 @@
3#include <cstring> 3#include <cstring>
4#include <list> 4#include <list>
5#include <optional> 5#include <optional>
6#include <vector> 6#include <unordered_set>
7 7
8#include "video_core/engines/shader_bytecode.h" 8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/shader_ir.h" 9#include "video_core/shader/shader_ir.h"
@@ -48,6 +48,7 @@ struct ShaderCharacteristics {
48 bool decompilable{}; 48 bool decompilable{};
49 u32 start; 49 u32 start;
50 u32 end; 50 u32 end;
51 std::unordered_set<u32> labels{};
51}; 52};
52 53
53bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, 54bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address,
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 1a74b70cb..f9b1960da 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -38,32 +38,47 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
38void ShaderIR::Decode() { 38void ShaderIR::Decode() {
39 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 39 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
40 40
41 disable_flow_stack = false;
41 ShaderCharacteristics shader_info{}; 42 ShaderCharacteristics shader_info{};
42 bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); 43 bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info);
43 if (can_proceed) { 44 if (can_proceed) {
44 coverage_begin = shader_info.start; 45 coverage_begin = shader_info.start;
45 coverage_end = shader_info.end; 46 coverage_end = shader_info.end;
46 if (shader_info.decompilable) { 47 if (shader_info.decompilable) {
48 disable_flow_stack = true;
49 auto insert_block = ([this](NodeBlock& nodes, u32 label) {
50 if (label == exit_branch) {
51 return;
52 }
53 basic_blocks.insert({label, nodes});
54 });
47 std::list<ShaderBlock>& blocks = shader_info.blocks; 55 std::list<ShaderBlock>& blocks = shader_info.blocks;
56 NodeBlock current_block;
57 u32 current_label = exit_branch;
48 for (auto& block : blocks) { 58 for (auto& block : blocks) {
49 NodeBlock nodes; 59 if (shader_info.labels.count(block.start) != 0) {
60 insert_block(current_block, current_label);
61 current_block.clear();
62 current_label = block.start;
63 }
50 if (!block.ignore_branch) { 64 if (!block.ignore_branch) {
51 nodes = DecodeRange(block.start, block.end); 65 DecodeRangeInner(current_block, block.start, block.end);
52 InsertControlFlow(nodes, block); 66 InsertControlFlow(current_block, block);
53 } else { 67 } else {
54 nodes = DecodeRange(block.start, block.end + 1); 68 DecodeRangeInner(current_block, block.start, block.end + 1);
55 } 69 }
56 basic_blocks.insert({block.start, nodes});
57 } 70 }
71 insert_block(current_block, current_label);
58 return; 72 return;
59 } 73 }
74 LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
60 // we can't decompile it, fallback to standard method 75 // we can't decompile it, fallback to standard method
61 for (const auto& block : shader_info.blocks) { 76 for (const auto& block : shader_info.blocks) {
62 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); 77 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
63 } 78 }
64 return; 79 return;
65 } 80 }
66 LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); 81 LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
67 82
68 // Now we need to deal with an undecompilable shader. We need to brute force 83 // Now we need to deal with an undecompilable shader. We need to brute force
69 // a shader that captures every position. 84 // a shader that captures every position.
@@ -78,10 +93,14 @@ void ShaderIR::Decode() {
78 93
79NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { 94NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
80 NodeBlock basic_block; 95 NodeBlock basic_block;
96 DecodeRangeInner(basic_block, begin, end);
97 return basic_block;
98}
99
100void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
81 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { 101 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
82 pc = DecodeInstr(basic_block, pc); 102 pc = DecodeInstr(bb, pc);
83 } 103 }
84 return basic_block;
85} 104}
86 105
87void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { 106void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index ed3c63781..42e3de02f 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -98,9 +98,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
98 } else { 98 } else {
99 const u32 target = pc + 1; 99 const u32 target = pc + 1;
100 const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); 100 const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
101 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, 101 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
102 true, PRECISE, op_a, Immediate(3)); 102 PRECISE, op_a, Immediate(3));
103 const Node operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); 103 const Node operand =
104 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
104 branch = Operation(OperationCode::BranchIndirect, convert); 105 branch = Operation(OperationCode::BranchIndirect, convert);
105 } 106 }
106 107
@@ -119,14 +120,14 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
119 const Node index = GetRegister(instr.gpr8); 120 const Node index = GetRegister(instr.gpr8);
120 const Node op_a = 121 const Node op_a =
121 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); 122 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
122 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, 123 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
123 true, PRECISE, op_a, Immediate(3)); 124 PRECISE, op_a, Immediate(3));
124 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); 125 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
125 } else { 126 } else {
126 const s32 target = pc + instr.brx.GetBranchExtend(); 127 const s32 target = pc + instr.brx.GetBranchExtend();
127 const Node op_a = GetRegister(instr.gpr8); 128 const Node op_a = GetRegister(instr.gpr8);
128 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, 129 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
129 true, PRECISE, op_a, Immediate(3)); 130 PRECISE, op_a, Immediate(3));
130 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); 131 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
131 } 132 }
132 const Node branch = Operation(OperationCode::BranchIndirect, operand); 133 const Node branch = Operation(OperationCode::BranchIndirect, operand);
@@ -143,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
143 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 144 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
144 "Constant buffer flow is not supported"); 145 "Constant buffer flow is not supported");
145 146
147 if (disable_flow_stack) {
148 break;
149 }
150
146 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. 151 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
147 const u32 target = pc + instr.bra.GetBranchTarget(); 152 const u32 target = pc + instr.bra.GetBranchTarget();
148 bb.push_back( 153 bb.push_back(
@@ -153,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
153 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 158 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
154 "Constant buffer PBK is not supported"); 159 "Constant buffer PBK is not supported");
155 160
161 if (disable_flow_stack) {
162 break;
163 }
164
156 // PBK pushes to a stack the address where BRK will jump to. 165 // PBK pushes to a stack the address where BRK will jump to.
157 const u32 target = pc + instr.bra.GetBranchTarget(); 166 const u32 target = pc + instr.bra.GetBranchTarget();
158 bb.push_back( 167 bb.push_back(
@@ -164,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
164 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", 173 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
165 static_cast<u32>(cc)); 174 static_cast<u32>(cc));
166 175
176 if (disable_flow_stack) {
177 break;
178 }
179
167 // The SYNC opcode jumps to the address previously set by the SSY opcode 180 // The SYNC opcode jumps to the address previously set by the SSY opcode
168 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); 181 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
169 break; 182 break;
@@ -172,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
172 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 185 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
173 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", 186 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
174 static_cast<u32>(cc)); 187 static_cast<u32>(cc));
188 if (disable_flow_stack) {
189 break;
190 }
175 191
176 // The BRK opcode jumps to the address previously set by the PBK opcode 192 // The BRK opcode jumps to the address previously set by the PBK opcode
177 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); 193 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index e468758a6..7427ed896 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -148,12 +148,12 @@ enum class OperationCode {
148 148
149 ImageStore, /// (MetaImage, float[N] coords) -> void 149 ImageStore, /// (MetaImage, float[N] coords) -> void
150 150
151 Branch, /// (uint branch_target) -> void 151 Branch, /// (uint branch_target) -> void
152 BranchIndirect,/// (uint branch_target) -> void 152 BranchIndirect, /// (uint branch_target) -> void
153 PushFlowStack, /// (uint branch_target) -> void 153 PushFlowStack, /// (uint branch_target) -> void
154 PopFlowStack, /// () -> void 154 PopFlowStack, /// () -> void
155 Exit, /// () -> void 155 Exit, /// () -> void
156 Discard, /// () -> void 156 Discard, /// () -> void
157 157
158 EmitVertex, /// () -> void 158 EmitVertex, /// () -> void
159 EndPrimitive, /// () -> void 159 EndPrimitive, /// () -> void
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index a6729064b..928ac7cb5 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -123,10 +123,15 @@ public:
123 return header; 123 return header;
124 } 124 }
125 125
126 bool IsFlowStackDisabled() const {
127 return disable_flow_stack;
128 }
129
126private: 130private:
127 void Decode(); 131 void Decode();
128 132
129 NodeBlock DecodeRange(u32 begin, u32 end); 133 NodeBlock DecodeRange(u32 begin, u32 end);
134 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
130 void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); 135 void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
131 136
132 /** 137 /**
@@ -320,6 +325,7 @@ private:
320 const ProgramCode& program_code; 325 const ProgramCode& program_code;
321 const u32 main_offset; 326 const u32 main_offset;
322 const std::size_t program_size; 327 const std::size_t program_size;
328 bool disable_flow_stack{};
323 329
324 u32 coverage_begin{}; 330 u32 coverage_begin{};
325 u32 coverage_end{}; 331 u32 coverage_end{};