diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.cpp | 47 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 35 | ||||
| -rw-r--r-- | src/video_core/shader/decode/other.cpp | 30 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 12 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 6 |
7 files changed, 85 insertions, 58 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index cedfe30b1..bfc975a04 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -191,10 +191,12 @@ public: | |||
| 191 | 191 | ||
| 192 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | 192 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems |
| 193 | // unlikely that shaders will use 20 nested SSYs and PBKs. | 193 | // unlikely that shaders will use 20 nested SSYs and PBKs. |
| 194 | constexpr u32 FLOW_STACK_SIZE = 20; | 194 | if (!ir.IsFlowStackDisabled()) { |
| 195 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | 195 | constexpr u32 FLOW_STACK_SIZE = 20; |
| 196 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | 196 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { |
| 197 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | 197 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); |
| 198 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||
| 199 | } | ||
| 198 | } | 200 | } |
| 199 | 201 | ||
| 200 | code.AddLine("while (true) {{"); | 202 | code.AddLine("while (true) {{"); |
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 3af4c6190..c99d95b57 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | 1 | ||
| 2 | #include <list> | 2 | #include <list> |
| 3 | #include <map> | ||
| 3 | #include <unordered_map> | 4 | #include <unordered_map> |
| 4 | #include <unordered_set> | 5 | #include <unordered_set> |
| 5 | #include <vector> | 6 | #include <vector> |
| @@ -104,28 +105,6 @@ struct BlockInfo { | |||
| 104 | } | 105 | } |
| 105 | }; | 106 | }; |
| 106 | 107 | ||
| 107 | struct Stamp { | ||
| 108 | Stamp() = default; | ||
| 109 | Stamp(u32 address, u32 target) : address{address}, target{target} {} | ||
| 110 | u32 address{}; | ||
| 111 | u32 target{}; | ||
| 112 | bool operator==(const Stamp& sb) const { | ||
| 113 | return std::tie(address, target) == std::tie(sb.address, sb.target); | ||
| 114 | } | ||
| 115 | bool operator<(const Stamp& sb) const { | ||
| 116 | return address < sb.address; | ||
| 117 | } | ||
| 118 | bool operator>(const Stamp& sb) const { | ||
| 119 | return address > sb.address; | ||
| 120 | } | ||
| 121 | bool operator<=(const Stamp& sb) const { | ||
| 122 | return address <= sb.address; | ||
| 123 | } | ||
| 124 | bool operator>=(const Stamp& sb) const { | ||
| 125 | return address >= sb.address; | ||
| 126 | } | ||
| 127 | }; | ||
| 128 | |||
| 129 | struct CFGRebuildState { | 108 | struct CFGRebuildState { |
| 130 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) | 109 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) |
| 131 | : program_code{program_code}, program_size{program_size} { | 110 | : program_code{program_code}, program_size{program_size} { |
| @@ -144,8 +123,8 @@ struct CFGRebuildState { | |||
| 144 | std::list<Query> queries{}; | 123 | std::list<Query> queries{}; |
| 145 | std::unordered_map<u32, u32> registered{}; | 124 | std::unordered_map<u32, u32> registered{}; |
| 146 | std::unordered_set<u32> labels{}; | 125 | std::unordered_set<u32> labels{}; |
| 147 | std::set<Stamp> ssy_labels; | 126 | std::map<u32, u32> ssy_labels; |
| 148 | std::set<Stamp> pbk_labels; | 127 | std::map<u32, u32> pbk_labels; |
| 149 | std::unordered_map<u32, BlockStack> stacks{}; | 128 | std::unordered_map<u32, BlockStack> stacks{}; |
| 150 | const ProgramCode& program_code; | 129 | const ProgramCode& program_code; |
| 151 | const std::size_t program_size; | 130 | const std::size_t program_size; |
| @@ -393,7 +372,7 @@ bool TryInspectAddress(CFGRebuildState& state) { | |||
| 393 | } | 372 | } |
| 394 | case BlockCollision::Inside: { | 373 | case BlockCollision::Inside: { |
| 395 | // This case is the tricky one: | 374 | // This case is the tricky one: |
| 396 | // We need to Split the block in 2 sepprate blocks | 375 | // We need to Split the block in 2 sepparate blocks |
| 397 | auto it = search_result.second; | 376 | auto it = search_result.second; |
| 398 | block_info = CreateBlockInfo(state, address, it->end); | 377 | block_info = CreateBlockInfo(state, address, it->end); |
| 399 | it->end = address - 1; | 378 | it->end = address - 1; |
| @@ -428,13 +407,11 @@ bool TryInspectAddress(CFGRebuildState& state) { | |||
| 428 | } | 407 | } |
| 429 | 408 | ||
| 430 | bool TryQuery(CFGRebuildState& state) { | 409 | bool TryQuery(CFGRebuildState& state) { |
| 431 | auto gather_labels = ([](ControlStack& cc, std::set<Stamp> labels, BlockInfo& block) { | 410 | auto gather_labels = ([](ControlStack& cc, std::map<u32, u32>& labels, BlockInfo& block) { |
| 432 | Stamp start{block.start, 0}; | 411 | auto gather_start = labels.lower_bound(block.start); |
| 433 | Stamp end{block.end, 0}; | 412 | auto gather_end = labels.upper_bound(block.end); |
| 434 | auto gather_start = labels.lower_bound(start); | ||
| 435 | auto gather_end = labels.upper_bound(end); | ||
| 436 | while (gather_start != gather_end) { | 413 | while (gather_start != gather_end) { |
| 437 | cc.Push(gather_start->target); | 414 | cc.Push(gather_start->second); |
| 438 | gather_start++; | 415 | gather_start++; |
| 439 | } | 416 | } |
| 440 | }); | 417 | }); |
| @@ -444,9 +421,13 @@ bool TryQuery(CFGRebuildState& state) { | |||
| 444 | Query& q = state.queries.front(); | 421 | Query& q = state.queries.front(); |
| 445 | u32 block_index = state.registered[q.address]; | 422 | u32 block_index = state.registered[q.address]; |
| 446 | BlockInfo& block = state.block_info[block_index]; | 423 | BlockInfo& block = state.block_info[block_index]; |
| 424 | // If the block is visted, check if the stacks match, else gather the ssy/pbk | ||
| 425 | // labels into the current stack and look if the branch at the end of the block | ||
| 426 | // consumes a label. Schedule new queries accordingly | ||
| 447 | if (block.visited) { | 427 | if (block.visited) { |
| 448 | BlockStack& stack = state.stacks[q.address]; | 428 | BlockStack& stack = state.stacks[q.address]; |
| 449 | bool all_okay = q.ssy_stack.Compare(stack.ssy_stack) && q.pbk_stack.Compare(stack.pbk_stack); | 429 | bool all_okay = (stack.ssy_stack.Size() == 0 || q.ssy_stack.Compare(stack.ssy_stack)) && |
| 430 | (stack.pbk_stack.Size() == 0 || q.pbk_stack.Compare(stack.pbk_stack)); | ||
| 450 | state.queries.pop_front(); | 431 | state.queries.pop_front(); |
| 451 | return all_okay; | 432 | return all_okay; |
| 452 | } | 433 | } |
| @@ -523,8 +504,10 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre | |||
| 523 | result_out.blocks.push_back(new_block); | 504 | result_out.blocks.push_back(new_block); |
| 524 | } | 505 | } |
| 525 | if (result_out.decompilable) { | 506 | if (result_out.decompilable) { |
| 507 | result_out.labels = std::move(state.labels); | ||
| 526 | return true; | 508 | return true; |
| 527 | } | 509 | } |
| 510 | // If it's not decompilable, merge the unlabelled blocks together | ||
| 528 | auto back = result_out.blocks.begin(); | 511 | auto back = result_out.blocks.begin(); |
| 529 | auto next = std::next(back); | 512 | auto next = std::next(back); |
| 530 | while (next != result_out.blocks.end()) { | 513 | while (next != result_out.blocks.end()) { |
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index f5d37a231..4a2cd622c 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | #include <cstring> | 3 | #include <cstring> |
| 4 | #include <list> | 4 | #include <list> |
| 5 | #include <optional> | 5 | #include <optional> |
| 6 | #include <vector> | 6 | #include <unordered_set> |
| 7 | 7 | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | 8 | #include "video_core/engines/shader_bytecode.h" |
| 9 | #include "video_core/shader/shader_ir.h" | 9 | #include "video_core/shader/shader_ir.h" |
| @@ -48,6 +48,7 @@ struct ShaderCharacteristics { | |||
| 48 | bool decompilable{}; | 48 | bool decompilable{}; |
| 49 | u32 start; | 49 | u32 start; |
| 50 | u32 end; | 50 | u32 end; |
| 51 | std::unordered_set<u32> labels{}; | ||
| 51 | }; | 52 | }; |
| 52 | 53 | ||
| 53 | bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, | 54 | bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 1a74b70cb..f9b1960da 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -38,32 +38,47 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 38 | void ShaderIR::Decode() { | 38 | void ShaderIR::Decode() { |
| 39 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 39 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 40 | 40 | ||
| 41 | disable_flow_stack = false; | ||
| 41 | ShaderCharacteristics shader_info{}; | 42 | ShaderCharacteristics shader_info{}; |
| 42 | bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); | 43 | bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); |
| 43 | if (can_proceed) { | 44 | if (can_proceed) { |
| 44 | coverage_begin = shader_info.start; | 45 | coverage_begin = shader_info.start; |
| 45 | coverage_end = shader_info.end; | 46 | coverage_end = shader_info.end; |
| 46 | if (shader_info.decompilable) { | 47 | if (shader_info.decompilable) { |
| 48 | disable_flow_stack = true; | ||
| 49 | auto insert_block = ([this](NodeBlock& nodes, u32 label) { | ||
| 50 | if (label == exit_branch) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | basic_blocks.insert({label, nodes}); | ||
| 54 | }); | ||
| 47 | std::list<ShaderBlock>& blocks = shader_info.blocks; | 55 | std::list<ShaderBlock>& blocks = shader_info.blocks; |
| 56 | NodeBlock current_block; | ||
| 57 | u32 current_label = exit_branch; | ||
| 48 | for (auto& block : blocks) { | 58 | for (auto& block : blocks) { |
| 49 | NodeBlock nodes; | 59 | if (shader_info.labels.count(block.start) != 0) { |
| 60 | insert_block(current_block, current_label); | ||
| 61 | current_block.clear(); | ||
| 62 | current_label = block.start; | ||
| 63 | } | ||
| 50 | if (!block.ignore_branch) { | 64 | if (!block.ignore_branch) { |
| 51 | nodes = DecodeRange(block.start, block.end); | 65 | DecodeRangeInner(current_block, block.start, block.end); |
| 52 | InsertControlFlow(nodes, block); | 66 | InsertControlFlow(current_block, block); |
| 53 | } else { | 67 | } else { |
| 54 | nodes = DecodeRange(block.start, block.end + 1); | 68 | DecodeRangeInner(current_block, block.start, block.end + 1); |
| 55 | } | 69 | } |
| 56 | basic_blocks.insert({block.start, nodes}); | ||
| 57 | } | 70 | } |
| 71 | insert_block(current_block, current_label); | ||
| 58 | return; | 72 | return; |
| 59 | } | 73 | } |
| 74 | LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); | ||
| 60 | // we can't decompile it, fallback to standard method | 75 | // we can't decompile it, fallback to standard method |
| 61 | for (const auto& block : shader_info.blocks) { | 76 | for (const auto& block : shader_info.blocks) { |
| 62 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | 77 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); |
| 63 | } | 78 | } |
| 64 | return; | 79 | return; |
| 65 | } | 80 | } |
| 66 | LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); | 81 | LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); |
| 67 | 82 | ||
| 68 | // Now we need to deal with an undecompilable shader. We need to brute force | 83 | // Now we need to deal with an undecompilable shader. We need to brute force |
| 69 | // a shader that captures every position. | 84 | // a shader that captures every position. |
| @@ -78,10 +93,14 @@ void ShaderIR::Decode() { | |||
| 78 | 93 | ||
| 79 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | 94 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { |
| 80 | NodeBlock basic_block; | 95 | NodeBlock basic_block; |
| 96 | DecodeRangeInner(basic_block, begin, end); | ||
| 97 | return basic_block; | ||
| 98 | } | ||
| 99 | |||
| 100 | void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { | ||
| 81 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | 101 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { |
| 82 | pc = DecodeInstr(basic_block, pc); | 102 | pc = DecodeInstr(bb, pc); |
| 83 | } | 103 | } |
| 84 | return basic_block; | ||
| 85 | } | 104 | } |
| 86 | 105 | ||
| 87 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | 106 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index ed3c63781..42e3de02f 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -98,9 +98,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 98 | } else { | 98 | } else { |
| 99 | const u32 target = pc + 1; | 99 | const u32 target = pc + 1; |
| 100 | const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | 100 | const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); |
| 101 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, | 101 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, |
| 102 | true, PRECISE, op_a, Immediate(3)); | 102 | PRECISE, op_a, Immediate(3)); |
| 103 | const Node operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | 103 | const Node operand = |
| 104 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 104 | branch = Operation(OperationCode::BranchIndirect, convert); | 105 | branch = Operation(OperationCode::BranchIndirect, convert); |
| 105 | } | 106 | } |
| 106 | 107 | ||
| @@ -119,14 +120,14 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 119 | const Node index = GetRegister(instr.gpr8); | 120 | const Node index = GetRegister(instr.gpr8); |
| 120 | const Node op_a = | 121 | const Node op_a = |
| 121 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | 122 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); |
| 122 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, | 123 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, |
| 123 | true, PRECISE, op_a, Immediate(3)); | 124 | PRECISE, op_a, Immediate(3)); |
| 124 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | 125 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); |
| 125 | } else { | 126 | } else { |
| 126 | const s32 target = pc + instr.brx.GetBranchExtend(); | 127 | const s32 target = pc + instr.brx.GetBranchExtend(); |
| 127 | const Node op_a = GetRegister(instr.gpr8); | 128 | const Node op_a = GetRegister(instr.gpr8); |
| 128 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, | 129 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, |
| 129 | true, PRECISE, op_a, Immediate(3)); | 130 | PRECISE, op_a, Immediate(3)); |
| 130 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | 131 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); |
| 131 | } | 132 | } |
| 132 | const Node branch = Operation(OperationCode::BranchIndirect, operand); | 133 | const Node branch = Operation(OperationCode::BranchIndirect, operand); |
| @@ -143,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 143 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 144 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 144 | "Constant buffer flow is not supported"); | 145 | "Constant buffer flow is not supported"); |
| 145 | 146 | ||
| 147 | if (disable_flow_stack) { | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | |||
| 146 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. | 151 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. |
| 147 | const u32 target = pc + instr.bra.GetBranchTarget(); | 152 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 148 | bb.push_back( | 153 | bb.push_back( |
| @@ -153,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 153 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 158 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 154 | "Constant buffer PBK is not supported"); | 159 | "Constant buffer PBK is not supported"); |
| 155 | 160 | ||
| 161 | if (disable_flow_stack) { | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | |||
| 156 | // PBK pushes to a stack the address where BRK will jump to. | 165 | // PBK pushes to a stack the address where BRK will jump to. |
| 157 | const u32 target = pc + instr.bra.GetBranchTarget(); | 166 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 158 | bb.push_back( | 167 | bb.push_back( |
| @@ -164,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 164 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | 173 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", |
| 165 | static_cast<u32>(cc)); | 174 | static_cast<u32>(cc)); |
| 166 | 175 | ||
| 176 | if (disable_flow_stack) { | ||
| 177 | break; | ||
| 178 | } | ||
| 179 | |||
| 167 | // The SYNC opcode jumps to the address previously set by the SSY opcode | 180 | // The SYNC opcode jumps to the address previously set by the SSY opcode |
| 168 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | 181 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); |
| 169 | break; | 182 | break; |
| @@ -172,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 172 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 185 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |
| 173 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | 186 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", |
| 174 | static_cast<u32>(cc)); | 187 | static_cast<u32>(cc)); |
| 188 | if (disable_flow_stack) { | ||
| 189 | break; | ||
| 190 | } | ||
| 175 | 191 | ||
| 176 | // The BRK opcode jumps to the address previously set by the PBK opcode | 192 | // The BRK opcode jumps to the address previously set by the PBK opcode |
| 177 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | 193 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index e468758a6..7427ed896 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -148,12 +148,12 @@ enum class OperationCode { | |||
| 148 | 148 | ||
| 149 | ImageStore, /// (MetaImage, float[N] coords) -> void | 149 | ImageStore, /// (MetaImage, float[N] coords) -> void |
| 150 | 150 | ||
| 151 | Branch, /// (uint branch_target) -> void | 151 | Branch, /// (uint branch_target) -> void |
| 152 | BranchIndirect,/// (uint branch_target) -> void | 152 | BranchIndirect, /// (uint branch_target) -> void |
| 153 | PushFlowStack, /// (uint branch_target) -> void | 153 | PushFlowStack, /// (uint branch_target) -> void |
| 154 | PopFlowStack, /// () -> void | 154 | PopFlowStack, /// () -> void |
| 155 | Exit, /// () -> void | 155 | Exit, /// () -> void |
| 156 | Discard, /// () -> void | 156 | Discard, /// () -> void |
| 157 | 157 | ||
| 158 | EmitVertex, /// () -> void | 158 | EmitVertex, /// () -> void |
| 159 | EndPrimitive, /// () -> void | 159 | EndPrimitive, /// () -> void |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index a6729064b..928ac7cb5 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -123,10 +123,15 @@ public: | |||
| 123 | return header; | 123 | return header; |
| 124 | } | 124 | } |
| 125 | 125 | ||
| 126 | bool IsFlowStackDisabled() const { | ||
| 127 | return disable_flow_stack; | ||
| 128 | } | ||
| 129 | |||
| 126 | private: | 130 | private: |
| 127 | void Decode(); | 131 | void Decode(); |
| 128 | 132 | ||
| 129 | NodeBlock DecodeRange(u32 begin, u32 end); | 133 | NodeBlock DecodeRange(u32 begin, u32 end); |
| 134 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||
| 130 | void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); | 135 | void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); |
| 131 | 136 | ||
| 132 | /** | 137 | /** |
| @@ -320,6 +325,7 @@ private: | |||
| 320 | const ProgramCode& program_code; | 325 | const ProgramCode& program_code; |
| 321 | const u32 main_offset; | 326 | const u32 main_offset; |
| 322 | const std::size_t program_size; | 327 | const std::size_t program_size; |
| 328 | bool disable_flow_stack{}; | ||
| 323 | 329 | ||
| 324 | u32 coverage_begin{}; | 330 | u32 coverage_begin{}; |
| 325 | u32 coverage_end{}; | 331 | u32 coverage_end{}; |