summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/shader_bytecode.h16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp9
-rw-r--r--src/video_core/shader/control_flow.cpp476
-rw-r--r--src/video_core/shader/control_flow.h63
-rw-r--r--src/video_core/shader/decode.cpp177
-rw-r--r--src/video_core/shader/decode/other.cpp58
-rw-r--r--src/video_core/shader/node.h11
-rw-r--r--src/video_core/shader/shader_ir.cpp4
-rw-r--r--src/video_core/shader/shader_ir.h27
15 files changed, 774 insertions, 122 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2554add28..2b4266f29 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp
56 "${VIDEO_CORE}/shader/decode/shift.cpp" 56 "${VIDEO_CORE}/shader/decode/shift.cpp"
57 "${VIDEO_CORE}/shader/decode/video.cpp" 57 "${VIDEO_CORE}/shader/decode/video.cpp"
58 "${VIDEO_CORE}/shader/decode/xmad.cpp" 58 "${VIDEO_CORE}/shader/decode/xmad.cpp"
59 "${VIDEO_CORE}/shader/control_flow.cpp"
60 "${VIDEO_CORE}/shader/control_flow.h"
59 "${VIDEO_CORE}/shader/decode.cpp" 61 "${VIDEO_CORE}/shader/decode.cpp"
60 "${VIDEO_CORE}/shader/node.h" 62 "${VIDEO_CORE}/shader/node.h"
61 "${VIDEO_CORE}/shader/node_helper.cpp" 63 "${VIDEO_CORE}/shader/node_helper.cpp"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6839abe71..cd32c65d3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -103,6 +103,8 @@ add_library(video_core STATIC
103 shader/decode/video.cpp 103 shader/decode/video.cpp
104 shader/decode/xmad.cpp 104 shader/decode/xmad.cpp
105 shader/decode/other.cpp 105 shader/decode/other.cpp
106 shader/control_flow.cpp
107 shader/control_flow.h
106 shader/decode.cpp 108 shader/decode.cpp
107 shader/node_helper.cpp 109 shader/node_helper.cpp
108 shader/node_helper.h 110 shader/node_helper.h
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 404d4f5aa..c3055602b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1368,6 +1368,20 @@ union Instruction {
1368 } bra; 1368 } bra;
1369 1369
1370 union { 1370 union {
1371 BitField<20, 24, u64> target;
1372 BitField<5, 1, u64> constant_buffer;
1373
1374 s32 GetBranchExtend() const {
1375 // Sign extend the branch target offset
1376 u32 mask = 1U << (24 - 1);
1377 u32 value = static_cast<u32>(target);
1378 // The branch offset is relative to the next instruction and is stored in bytes, so
1379 // divide it by the size of an instruction and add 1 to it.
1380 return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
1381 }
1382 } brx;
1383
1384 union {
1371 BitField<39, 1, u64> emit; // EmitVertex 1385 BitField<39, 1, u64> emit; // EmitVertex
1372 BitField<40, 1, u64> cut; // EndPrimitive 1386 BitField<40, 1, u64> cut; // EndPrimitive
1373 } out; 1387 } out;
@@ -1464,6 +1478,7 @@ public:
1464 BFE_IMM, 1478 BFE_IMM,
1465 BFI_IMM_R, 1479 BFI_IMM_R,
1466 BRA, 1480 BRA,
1481 BRX,
1467 PBK, 1482 PBK,
1468 LD_A, 1483 LD_A,
1469 LD_L, 1484 LD_L,
@@ -1738,6 +1753,7 @@ private:
1738 INST("111000101001----", Id::SSY, Type::Flow, "SSY"), 1753 INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
1739 INST("111000101010----", Id::PBK, Type::Flow, "PBK"), 1754 INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
1740 INST("111000100100----", Id::BRA, Type::Flow, "BRA"), 1755 INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
1756 INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
1741 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), 1757 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
1742 INST("111000110100---", Id::BRK, Type::Flow, "BRK"), 1758 INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
1743 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 1759 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f9b2b03a0..5d76ee12d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -129,9 +129,11 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
129 129
130/// Hashes one (or two) program streams 130/// Hashes one (or two) program streams
131u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, 131u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
132 const ProgramCode& code_b) { 132 const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
133 u64 unique_identifier = 133 if (size_a == 0) {
134 Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code)); 134 size_a = CalculateProgramSize(code);
135 }
136 u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
135 if (program_type != Maxwell::ShaderProgram::VertexA) { 137 if (program_type != Maxwell::ShaderProgram::VertexA) {
136 return unique_identifier; 138 return unique_identifier;
137 } 139 }
@@ -140,8 +142,11 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
140 std::size_t seed = 0; 142 std::size_t seed = 0;
141 boost::hash_combine(seed, unique_identifier); 143 boost::hash_combine(seed, unique_identifier);
142 144
143 const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), 145 if (size_b == 0) {
144 CalculateProgramSize(code_b)); 146 size_b = CalculateProgramSize(code_b);
147 }
148 const u64 identifier_b =
149 Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
145 boost::hash_combine(seed, identifier_b); 150 boost::hash_combine(seed, identifier_b);
146 return static_cast<u64>(seed); 151 return static_cast<u64>(seed);
147} 152}
@@ -150,14 +155,17 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
150GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, 155GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type,
151 ProgramCode program_code, ProgramCode program_code_b) { 156 ProgramCode program_code, ProgramCode program_code_b) {
152 GLShader::ShaderSetup setup(program_code); 157 GLShader::ShaderSetup setup(program_code);
158 setup.program.size_a = CalculateProgramSize(program_code);
159 setup.program.size_b = 0;
153 if (program_type == Maxwell::ShaderProgram::VertexA) { 160 if (program_type == Maxwell::ShaderProgram::VertexA) {
154 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. 161 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
155 // Conventional HW does not support this, so we combine VertexA and VertexB into one 162 // Conventional HW does not support this, so we combine VertexA and VertexB into one
156 // stage here. 163 // stage here.
157 setup.SetProgramB(program_code_b); 164 setup.SetProgramB(program_code_b);
165 setup.program.size_b = CalculateProgramSize(program_code_b);
158 } 166 }
159 setup.program.unique_identifier = 167 setup.program.unique_identifier = GetUniqueIdentifier(
160 GetUniqueIdentifier(program_type, program_code, program_code_b); 168 program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
161 169
162 switch (program_type) { 170 switch (program_type) {
163 case Maxwell::ShaderProgram::VertexA: 171 case Maxwell::ShaderProgram::VertexA:
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 5f2f1510c..bfc975a04 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -191,10 +191,12 @@ public:
191 191
192 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems 192 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
193 // unlikely that shaders will use 20 nested SSYs and PBKs. 193 // unlikely that shaders will use 20 nested SSYs and PBKs.
194 constexpr u32 FLOW_STACK_SIZE = 20; 194 if (!ir.IsFlowStackDisabled()) {
195 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { 195 constexpr u32 FLOW_STACK_SIZE = 20;
196 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); 196 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
197 code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); 197 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
198 code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
199 }
198 } 200 }
199 201
200 code.AddLine("while (true) {{"); 202 code.AddLine("while (true) {{");
@@ -1555,6 +1557,14 @@ private:
1555 return {}; 1557 return {};
1556 } 1558 }
1557 1559
1560 std::string BranchIndirect(Operation operation) {
1561 const std::string op_a = VisitOperand(operation, 0, Type::Uint);
1562
1563 code.AddLine("jmp_to = {};", op_a);
1564 code.AddLine("break;");
1565 return {};
1566 }
1567
1558 std::string PushFlowStack(Operation operation) { 1568 std::string PushFlowStack(Operation operation) {
1559 const auto stack = std::get<MetaStackClass>(operation.GetMeta()); 1569 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1560 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 1570 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
@@ -1789,6 +1799,7 @@ private:
1789 &GLSLDecompiler::ImageStore, 1799 &GLSLDecompiler::ImageStore,
1790 1800
1791 &GLSLDecompiler::Branch, 1801 &GLSLDecompiler::Branch,
1802 &GLSLDecompiler::BranchIndirect,
1792 &GLSLDecompiler::PushFlowStack, 1803 &GLSLDecompiler::PushFlowStack,
1793 &GLSLDecompiler::PopFlowStack, 1804 &GLSLDecompiler::PopFlowStack,
1794 &GLSLDecompiler::Exit, 1805 &GLSLDecompiler::Exit,
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 9148629ec..f9ee8429e 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -29,14 +29,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
29}; 29};
30 30
31)"; 31)";
32 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); 32 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
33 ProgramResult program = 33 ProgramResult program =
34 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); 34 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
35 35
36 out += program.first; 36 out += program.first;
37 37
38 if (setup.IsDualProgram()) { 38 if (setup.IsDualProgram()) {
39 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); 39 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
40 ProgramResult program_b = 40 ProgramResult program_b =
41 Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); 41 Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
42 42
@@ -80,7 +80,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
80}; 80};
81 81
82)"; 82)";
83 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); 83 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
84 ProgramResult program = 84 ProgramResult program =
85 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); 85 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
86 out += program.first; 86 out += program.first;
@@ -115,7 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
115}; 115};
116 116
117)"; 117)";
118 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); 118 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
119 ProgramResult program = 119 ProgramResult program =
120 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); 120 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
121 121
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 0536c8a03..7cbc590f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -27,6 +27,8 @@ struct ShaderSetup {
27 ProgramCode code; 27 ProgramCode code;
28 ProgramCode code_b; // Used for dual vertex shaders 28 ProgramCode code_b; // Used for dual vertex shaders
29 u64 unique_identifier; 29 u64 unique_identifier;
30 std::size_t size_a;
31 std::size_t size_b;
30 } program; 32 } program;
31 33
32 /// Used in scenarios where we have a dual vertex shaders 34 /// Used in scenarios where we have a dual vertex shaders
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 97ce214b1..1bb04607b 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -949,6 +949,14 @@ private:
949 return {}; 949 return {};
950 } 950 }
951 951
952 Id BranchIndirect(Operation operation) {
953 const Id op_a = VisitOperand<Type::Uint>(operation, 0);
954
955 Emit(OpStore(jmp_to, op_a));
956 BranchingOp([&]() { Emit(OpBranch(continue_label)); });
957 return {};
958 }
959
952 Id PushFlowStack(Operation operation) { 960 Id PushFlowStack(Operation operation) {
953 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 961 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
954 ASSERT(target); 962 ASSERT(target);
@@ -1334,6 +1342,7 @@ private:
1334 &SPIRVDecompiler::ImageStore, 1342 &SPIRVDecompiler::ImageStore,
1335 1343
1336 &SPIRVDecompiler::Branch, 1344 &SPIRVDecompiler::Branch,
1345 &SPIRVDecompiler::BranchIndirect,
1337 &SPIRVDecompiler::PushFlowStack, 1346 &SPIRVDecompiler::PushFlowStack,
1338 &SPIRVDecompiler::PopFlowStack, 1347 &SPIRVDecompiler::PopFlowStack,
1339 &SPIRVDecompiler::Exit, 1348 &SPIRVDecompiler::Exit,
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
new file mode 100644
index 000000000..fdcc970ff
--- /dev/null
+++ b/src/video_core/shader/control_flow.cpp
@@ -0,0 +1,476 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <list>
6#include <map>
7#include <stack>
8#include <unordered_map>
9#include <unordered_set>
10#include <vector>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/shader/control_flow.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace VideoCommon::Shader {
18
19using Tegra::Shader::Instruction;
20using Tegra::Shader::OpCode;
21
22constexpr s32 unassigned_branch = -2;
23
24struct Query {
25 u32 address{};
26 std::stack<u32> ssy_stack{};
27 std::stack<u32> pbk_stack{};
28};
29
30struct BlockStack {
31 BlockStack() = default;
32 BlockStack(const BlockStack& b) = default;
33 BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
34 std::stack<u32> ssy_stack{};
35 std::stack<u32> pbk_stack{};
36};
37
38struct BlockBranchInfo {
39 Condition condition{};
40 s32 address{exit_branch};
41 bool kill{};
42 bool is_sync{};
43 bool is_brk{};
44 bool ignore{};
45};
46
47struct BlockInfo {
48 u32 start{};
49 u32 end{};
50 bool visited{};
51 BlockBranchInfo branch{};
52
53 bool IsInside(const u32 address) const {
54 return start <= address && address <= end;
55 }
56};
57
58struct CFGRebuildState {
59 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
60 const u32 start)
61 : program_code{program_code}, program_size{program_size}, start{start} {}
62
63 u32 start{};
64 std::vector<BlockInfo> block_info{};
65 std::list<u32> inspect_queries{};
66 std::list<Query> queries{};
67 std::unordered_map<u32, u32> registered{};
68 std::unordered_set<u32> labels{};
69 std::map<u32, u32> ssy_labels{};
70 std::map<u32, u32> pbk_labels{};
71 std::unordered_map<u32, BlockStack> stacks{};
72 const ProgramCode& program_code;
73 const std::size_t program_size;
74};
75
76enum class BlockCollision : u32 { None, Found, Inside };
77
78std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
79 const auto& blocks = state.block_info;
80 for (u32 index = 0; index < blocks.size(); index++) {
81 if (blocks[index].start == address) {
82 return {BlockCollision::Found, index};
83 }
84 if (blocks[index].IsInside(address)) {
85 return {BlockCollision::Inside, index};
86 }
87 }
88 return {BlockCollision::None, -1};
89}
90
91struct ParseInfo {
92 BlockBranchInfo branch_info{};
93 u32 end_address{};
94};
95
96BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
97 auto& it = state.block_info.emplace_back();
98 it.start = start;
99 it.end = end;
100 const u32 index = static_cast<u32>(state.block_info.size() - 1);
101 state.registered.insert({start, index});
102 return it;
103}
104
105Pred GetPredicate(u32 index, bool negated) {
106 return static_cast<Pred>(index + (negated ? 8 : 0));
107}
108
109/**
110 * Returns whether the instruction at the specified offset is a 'sched' instruction.
111 * Sched instructions always appear before a sequence of 3 instructions.
112 */
113constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
114 constexpr u32 SchedPeriod = 4;
115 u32 absolute_offset = offset - main_offset;
116
117 return (absolute_offset % SchedPeriod) == 0;
118}
119
120enum class ParseResult : u32 {
121 ControlCaught,
122 BlockEnd,
123 AbnormalFlow,
124};
125
126std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
127 u32 offset = static_cast<u32>(address);
128 const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction));
129 ParseInfo parse_info{};
130
131 const auto insert_label = [](CFGRebuildState& state, u32 address) {
132 const auto pair = state.labels.emplace(address);
133 if (pair.second) {
134 state.inspect_queries.push_back(address);
135 }
136 };
137
138 while (true) {
139 if (offset >= end_address) {
140 // ASSERT_OR_EXECUTE can't be used, as it ignores the break
141 ASSERT_MSG(false, "Shader passed the current limit!");
142 parse_info.branch_info.address = exit_branch;
143 parse_info.branch_info.ignore = false;
144 break;
145 }
146 if (state.registered.count(offset) != 0) {
147 parse_info.branch_info.address = offset;
148 parse_info.branch_info.ignore = true;
149 break;
150 }
151 if (IsSchedInstruction(offset, state.start)) {
152 offset++;
153 continue;
154 }
155 const Instruction instr = {state.program_code[offset]};
156 const auto opcode = OpCode::Decode(instr);
157 if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
158 offset++;
159 continue;
160 }
161
162 switch (opcode->get().GetId()) {
163 case OpCode::Id::EXIT: {
164 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
165 parse_info.branch_info.condition.predicate =
166 GetPredicate(pred_index, instr.negate_pred != 0);
167 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
168 offset++;
169 continue;
170 }
171 const ConditionCode cc = instr.flow_condition_code;
172 parse_info.branch_info.condition.cc = cc;
173 if (cc == ConditionCode::F) {
174 offset++;
175 continue;
176 }
177 parse_info.branch_info.address = exit_branch;
178 parse_info.branch_info.kill = false;
179 parse_info.branch_info.is_sync = false;
180 parse_info.branch_info.is_brk = false;
181 parse_info.branch_info.ignore = false;
182 parse_info.end_address = offset;
183
184 return {ParseResult::ControlCaught, parse_info};
185 }
186 case OpCode::Id::BRA: {
187 if (instr.bra.constant_buffer != 0) {
188 return {ParseResult::AbnormalFlow, parse_info};
189 }
190 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
191 parse_info.branch_info.condition.predicate =
192 GetPredicate(pred_index, instr.negate_pred != 0);
193 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
194 offset++;
195 continue;
196 }
197 const ConditionCode cc = instr.flow_condition_code;
198 parse_info.branch_info.condition.cc = cc;
199 if (cc == ConditionCode::F) {
200 offset++;
201 continue;
202 }
203 const u32 branch_offset = offset + instr.bra.GetBranchTarget();
204 if (branch_offset == 0) {
205 parse_info.branch_info.address = exit_branch;
206 } else {
207 parse_info.branch_info.address = branch_offset;
208 }
209 insert_label(state, branch_offset);
210 parse_info.branch_info.kill = false;
211 parse_info.branch_info.is_sync = false;
212 parse_info.branch_info.is_brk = false;
213 parse_info.branch_info.ignore = false;
214 parse_info.end_address = offset;
215
216 return {ParseResult::ControlCaught, parse_info};
217 }
218 case OpCode::Id::SYNC: {
219 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
220 parse_info.branch_info.condition.predicate =
221 GetPredicate(pred_index, instr.negate_pred != 0);
222 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
223 offset++;
224 continue;
225 }
226 const ConditionCode cc = instr.flow_condition_code;
227 parse_info.branch_info.condition.cc = cc;
228 if (cc == ConditionCode::F) {
229 offset++;
230 continue;
231 }
232 parse_info.branch_info.address = unassigned_branch;
233 parse_info.branch_info.kill = false;
234 parse_info.branch_info.is_sync = true;
235 parse_info.branch_info.is_brk = false;
236 parse_info.branch_info.ignore = false;
237 parse_info.end_address = offset;
238
239 return {ParseResult::ControlCaught, parse_info};
240 }
241 case OpCode::Id::BRK: {
242 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
243 parse_info.branch_info.condition.predicate =
244 GetPredicate(pred_index, instr.negate_pred != 0);
245 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
246 offset++;
247 continue;
248 }
249 const ConditionCode cc = instr.flow_condition_code;
250 parse_info.branch_info.condition.cc = cc;
251 if (cc == ConditionCode::F) {
252 offset++;
253 continue;
254 }
255 parse_info.branch_info.address = unassigned_branch;
256 parse_info.branch_info.kill = false;
257 parse_info.branch_info.is_sync = false;
258 parse_info.branch_info.is_brk = true;
259 parse_info.branch_info.ignore = false;
260 parse_info.end_address = offset;
261
262 return {ParseResult::ControlCaught, parse_info};
263 }
264 case OpCode::Id::KIL: {
265 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
266 parse_info.branch_info.condition.predicate =
267 GetPredicate(pred_index, instr.negate_pred != 0);
268 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
269 offset++;
270 continue;
271 }
272 const ConditionCode cc = instr.flow_condition_code;
273 parse_info.branch_info.condition.cc = cc;
274 if (cc == ConditionCode::F) {
275 offset++;
276 continue;
277 }
278 parse_info.branch_info.address = exit_branch;
279 parse_info.branch_info.kill = true;
280 parse_info.branch_info.is_sync = false;
281 parse_info.branch_info.is_brk = false;
282 parse_info.branch_info.ignore = false;
283 parse_info.end_address = offset;
284
285 return {ParseResult::ControlCaught, parse_info};
286 }
287 case OpCode::Id::SSY: {
288 const u32 target = offset + instr.bra.GetBranchTarget();
289 insert_label(state, target);
290 state.ssy_labels.emplace(offset, target);
291 break;
292 }
293 case OpCode::Id::PBK: {
294 const u32 target = offset + instr.bra.GetBranchTarget();
295 insert_label(state, target);
296 state.pbk_labels.emplace(offset, target);
297 break;
298 }
299 case OpCode::Id::BRX: {
300 return {ParseResult::AbnormalFlow, parse_info};
301 }
302 default:
303 break;
304 }
305
306 offset++;
307 }
308 parse_info.branch_info.kill = false;
309 parse_info.branch_info.is_sync = false;
310 parse_info.branch_info.is_brk = false;
311 parse_info.end_address = offset - 1;
312 return {ParseResult::BlockEnd, parse_info};
313}
314
315bool TryInspectAddress(CFGRebuildState& state) {
316 if (state.inspect_queries.empty()) {
317 return false;
318 }
319
320 const u32 address = state.inspect_queries.front();
321 state.inspect_queries.pop_front();
322 const auto [result, block_index] = TryGetBlock(state, address);
323 switch (result) {
324 case BlockCollision::Found: {
325 return true;
326 }
327 case BlockCollision::Inside: {
328 // This case is the tricky one:
329 // We need to Split the block in 2 sepparate blocks
330 const u32 end = state.block_info[block_index].end;
331 BlockInfo& new_block = CreateBlockInfo(state, address, end);
332 BlockInfo& current_block = state.block_info[block_index];
333 current_block.end = address - 1;
334 new_block.branch = current_block.branch;
335 BlockBranchInfo forward_branch{};
336 forward_branch.address = address;
337 forward_branch.ignore = true;
338 current_block.branch = forward_branch;
339 return true;
340 }
341 default:
342 break;
343 }
344 const auto [parse_result, parse_info] = ParseCode(state, address);
345 if (parse_result == ParseResult::AbnormalFlow) {
346 // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
347 return false;
348 }
349
350 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
351 block_info.branch = parse_info.branch_info;
352 if (parse_info.branch_info.condition.IsUnconditional()) {
353 return true;
354 }
355
356 const u32 fallthrough_address = parse_info.end_address + 1;
357 state.inspect_queries.push_front(fallthrough_address);
358 return true;
359}
360
361bool TryQuery(CFGRebuildState& state) {
362 const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
363 BlockInfo& block) {
364 auto gather_start = labels.lower_bound(block.start);
365 const auto gather_end = labels.upper_bound(block.end);
366 while (gather_start != gather_end) {
367 cc.push(gather_start->second);
368 gather_start++;
369 }
370 };
371 if (state.queries.empty()) {
372 return false;
373 }
374 Query& q = state.queries.front();
375 const u32 block_index = state.registered[q.address];
376 BlockInfo& block = state.block_info[block_index];
377 // If the block is visted, check if the stacks match, else gather the ssy/pbk
378 // labels into the current stack and look if the branch at the end of the block
379 // consumes a label. Schedule new queries accordingly
380 if (block.visited) {
381 BlockStack& stack = state.stacks[q.address];
382 const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) &&
383 (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack);
384 state.queries.pop_front();
385 return all_okay;
386 }
387 block.visited = true;
388 state.stacks[q.address] = BlockStack{q};
389 Query q2(q);
390 state.queries.pop_front();
391 gather_labels(q2.ssy_stack, state.ssy_labels, block);
392 gather_labels(q2.pbk_stack, state.pbk_labels, block);
393 if (!block.branch.condition.IsUnconditional()) {
394 q2.address = block.end + 1;
395 state.queries.push_back(q2);
396 }
397 Query conditional_query{q2};
398 if (block.branch.is_sync) {
399 if (block.branch.address == unassigned_branch) {
400 block.branch.address = conditional_query.ssy_stack.top();
401 }
402 conditional_query.ssy_stack.pop();
403 }
404 if (block.branch.is_brk) {
405 if (block.branch.address == unassigned_branch) {
406 block.branch.address = conditional_query.pbk_stack.top();
407 }
408 conditional_query.pbk_stack.pop();
409 }
410 conditional_query.address = block.branch.address;
411 state.queries.push_back(conditional_query);
412 return true;
413}
414
415std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size,
416 u32 start_address) {
417 CFGRebuildState state{program_code, program_size, start_address};
418 // Inspect Code and generate blocks
419 state.labels.clear();
420 state.labels.emplace(start_address);
421 state.inspect_queries.push_back(state.start);
422 while (!state.inspect_queries.empty()) {
423 if (!TryInspectAddress(state)) {
424 return {};
425 }
426 }
427 // Decompile Stacks
428 Query start_query{};
429 start_query.address = state.start;
430 state.queries.push_back(start_query);
431 bool decompiled = true;
432 while (!state.queries.empty()) {
433 if (!TryQuery(state)) {
434 decompiled = false;
435 break;
436 }
437 }
438 // Sort and organize results
439 std::sort(state.block_info.begin(), state.block_info.end(),
440 [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
441 ShaderCharacteristics result_out{};
442 result_out.decompilable = decompiled;
443 result_out.start = start_address;
444 result_out.end = start_address;
445 for (auto& block : state.block_info) {
446 ShaderBlock new_block{};
447 new_block.start = block.start;
448 new_block.end = block.end;
449 new_block.ignore_branch = block.branch.ignore;
450 if (!new_block.ignore_branch) {
451 new_block.branch.cond = block.branch.condition;
452 new_block.branch.kills = block.branch.kill;
453 new_block.branch.address = block.branch.address;
454 }
455 result_out.end = std::max(result_out.end, block.end);
456 result_out.blocks.push_back(new_block);
457 }
458 if (result_out.decompilable) {
459 result_out.labels = std::move(state.labels);
460 return {result_out};
461 }
462 // If it's not decompilable, merge the unlabelled blocks together
463 auto back = result_out.blocks.begin();
464 auto next = std::next(back);
465 while (next != result_out.blocks.end()) {
466 if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
467 back->end = next->end;
468 next = result_out.blocks.erase(next);
469 continue;
470 }
471 back = next;
472 next++;
473 }
474 return {result_out};
475}
476} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
new file mode 100644
index 000000000..5e8ea3271
--- /dev/null
+++ b/src/video_core/shader/control_flow.h
@@ -0,0 +1,63 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstring>
8#include <list>
9#include <optional>
10#include <unordered_set>
11
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/shader_ir.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Shader::ConditionCode;
18using Tegra::Shader::Pred;
19
20constexpr s32 exit_branch = -1;
21
22struct Condition {
23 Pred predicate{Pred::UnusedIndex};
24 ConditionCode cc{ConditionCode::T};
25
26 bool IsUnconditional() const {
27 return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
28 }
29 bool operator==(const Condition& other) const {
30 return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
31 }
32};
33
34struct ShaderBlock {
35 u32 start{};
36 u32 end{};
37 bool ignore_branch{};
38 struct Branch {
39 Condition cond{};
40 bool kills{};
41 s32 address{};
42 bool operator==(const Branch& b) const {
43 return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
44 }
45 } branch{};
46 bool operator==(const ShaderBlock& sb) const {
47 return std::tie(start, end, ignore_branch, branch) ==
48 std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
49 }
50};
51
52struct ShaderCharacteristics {
53 std::list<ShaderBlock> blocks{};
54 bool decompilable{};
55 u32 start{};
56 u32 end{};
57 std::unordered_set<u32> labels{};
58};
59
60std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size,
61 u32 start_address);
62
63} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2c9ff28f2..29c8895c5 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -11,6 +11,7 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/engines/shader_header.h" 13#include "video_core/engines/shader_header.h"
14#include "video_core/shader/control_flow.h"
14#include "video_core/shader/node_helper.h" 15#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
@@ -21,20 +22,6 @@ using Tegra::Shader::OpCode;
21 22
22namespace { 23namespace {
23 24
24/// Merges exit method of two parallel branches.
25constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
26 if (a == ExitMethod::Undetermined) {
27 return b;
28 }
29 if (b == ExitMethod::Undetermined) {
30 return a;
31 }
32 if (a == b) {
33 return a;
34 }
35 return ExitMethod::Conditional;
36}
37
38/** 25/**
39 * Returns whether the instruction at the specified offset is a 'sched' instruction. 26 * Returns whether the instruction at the specified offset is a 'sched' instruction.
40 * Sched instructions always appear before a sequence of 3 instructions. 27 * Sched instructions always appear before a sequence of 3 instructions.
@@ -51,85 +38,104 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
51void ShaderIR::Decode() { 38void ShaderIR::Decode() {
52 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 39 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
53 40
54 std::set<u32> labels; 41 disable_flow_stack = false;
55 const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); 42 const auto info = ScanFlow(program_code, program_size, main_offset);
56 if (exit_method != ExitMethod::AlwaysEnd) { 43 if (info) {
57 UNREACHABLE_MSG("Program does not always end"); 44 const auto& shader_info = *info;
58 } 45 coverage_begin = shader_info.start;
59 46 coverage_end = shader_info.end;
60 if (labels.empty()) { 47 if (shader_info.decompilable) {
61 basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); 48 disable_flow_stack = true;
49 const auto insert_block = ([this](NodeBlock& nodes, u32 label) {
50 if (label == exit_branch) {
51 return;
52 }
53 basic_blocks.insert({label, nodes});
54 });
55 const auto& blocks = shader_info.blocks;
56 NodeBlock current_block;
57 u32 current_label = exit_branch;
58 for (auto& block : blocks) {
59 if (shader_info.labels.count(block.start) != 0) {
60 insert_block(current_block, current_label);
61 current_block.clear();
62 current_label = block.start;
63 }
64 if (!block.ignore_branch) {
65 DecodeRangeInner(current_block, block.start, block.end);
66 InsertControlFlow(current_block, block);
67 } else {
68 DecodeRangeInner(current_block, block.start, block.end + 1);
69 }
70 }
71 insert_block(current_block, current_label);
72 return;
73 }
74 LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
75 // we can't decompile it, fallback to standard method
76 for (const auto& block : shader_info.blocks) {
77 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
78 }
62 return; 79 return;
63 } 80 }
81 LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
82
83 // Now we need to deal with an undecompilable shader. We need to brute force
84 // a shader that captures every position.
85 coverage_begin = main_offset;
86 const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
87 coverage_end = shader_end;
88 for (u32 label = main_offset; label < shader_end; label++) {
89 basic_blocks.insert({label, DecodeRange(label, label + 1)});
90 }
91}
64 92
65 labels.insert(main_offset); 93NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
66 94 NodeBlock basic_block;
67 for (const u32 label : labels) { 95 DecodeRangeInner(basic_block, begin, end);
68 const auto next_it = labels.lower_bound(label + 1); 96 return basic_block;
69 const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; 97}
70 98
71 basic_blocks.insert({label, DecodeRange(label, next_label)}); 99void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
100 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
101 pc = DecodeInstr(bb, pc);
72 } 102 }
73} 103}
74 104
75ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { 105void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
76 const auto [iter, inserted] = 106 const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node {
77 exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); 107 Node result = n;
78 ExitMethod& exit_method = iter->second; 108 if (cond.cc != ConditionCode::T) {
79 if (!inserted) 109 result = Conditional(GetConditionCode(cond.cc), {result});
80 return exit_method;
81
82 for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
83 coverage_begin = std::min(coverage_begin, offset);
84 coverage_end = std::max(coverage_end, offset + 1);
85
86 const Instruction instr = {program_code[offset]};
87 const auto opcode = OpCode::Decode(instr);
88 if (!opcode)
89 continue;
90 switch (opcode->get().GetId()) {
91 case OpCode::Id::EXIT: {
92 // The EXIT instruction can be predicated, which means that the shader can conditionally
93 // end on this instruction. We have to consider the case where the condition is not met
94 // and check the exit method of that other basic block.
95 using Tegra::Shader::Pred;
96 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
97 return exit_method = ExitMethod::AlwaysEnd;
98 } else {
99 const ExitMethod not_met = Scan(offset + 1, end, labels);
100 return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
101 }
102 } 110 }
103 case OpCode::Id::BRA: { 111 if (cond.predicate != Pred::UnusedIndex) {
104 const u32 target = offset + instr.bra.GetBranchTarget(); 112 u32 pred = static_cast<u32>(cond.predicate);
105 labels.insert(target); 113 const bool is_neg = pred > 7;
106 const ExitMethod no_jmp = Scan(offset + 1, end, labels); 114 if (is_neg) {
107 const ExitMethod jmp = Scan(target, end, labels); 115 pred -= 8;
108 return exit_method = ParallelExit(no_jmp, jmp); 116 }
109 } 117 result = Conditional(GetPredicate(pred, is_neg), {result});
110 case OpCode::Id::SSY:
111 case OpCode::Id::PBK: {
112 // The SSY and PBK use a similar encoding as the BRA instruction.
113 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
114 "Constant buffer branching is not supported");
115 const u32 target = offset + instr.bra.GetBranchTarget();
116 labels.insert(target);
117 // Continue scanning for an exit method.
118 break;
119 } 118 }
120 default: 119 return result;
121 break; 120 });
121 if (block.branch.address < 0) {
122 if (block.branch.kills) {
123 Node n = Operation(OperationCode::Discard);
124 n = apply_conditions(block.branch.cond, n);
125 bb.push_back(n);
126 global_code.push_back(n);
127 return;
122 } 128 }
129 Node n = Operation(OperationCode::Exit);
130 n = apply_conditions(block.branch.cond, n);
131 bb.push_back(n);
132 global_code.push_back(n);
133 return;
123 } 134 }
124 return exit_method = ExitMethod::AlwaysReturn; 135 Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
125} 136 n = apply_conditions(block.branch.cond, n);
126 137 bb.push_back(n);
127NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { 138 global_code.push_back(n);
128 NodeBlock basic_block;
129 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
130 pc = DecodeInstr(basic_block, pc);
131 }
132 return basic_block;
133} 139}
134 140
135u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { 141u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
@@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
140 146
141 const Instruction instr = {program_code[pc]}; 147 const Instruction instr = {program_code[pc]};
142 const auto opcode = OpCode::Decode(instr); 148 const auto opcode = OpCode::Decode(instr);
149 const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
143 150
144 // Decoding failure 151 // Decoding failure
145 if (!opcode) { 152 if (!opcode) {
146 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); 153 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
154 bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
155 nv_address, instr.value)));
147 return pc + 1; 156 return pc + 1;
148 } 157 }
149 158
150 bb.push_back( 159 bb.push_back(Comment(
151 Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); 160 fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
152 161
153 using Tegra::Shader::Pred; 162 using Tegra::Shader::Pred;
154 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, 163 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d46a8ab82..42e3de02f 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -91,11 +91,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
91 break; 91 break;
92 } 92 }
93 case OpCode::Id::BRA: { 93 case OpCode::Id::BRA: {
94 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 94 Node branch;
95 "BRA with constant buffers are not implemented"); 95 if (instr.bra.constant_buffer == 0) {
96 const u32 target = pc + instr.bra.GetBranchTarget();
97 branch = Operation(OperationCode::Branch, Immediate(target));
98 } else {
99 const u32 target = pc + 1;
100 const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
101 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
102 PRECISE, op_a, Immediate(3));
103 const Node operand =
104 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
105 branch = Operation(OperationCode::BranchIndirect, convert);
106 }
96 107
97 const u32 target = pc + instr.bra.GetBranchTarget(); 108 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
98 const Node branch = Operation(OperationCode::Branch, Immediate(target)); 109 if (cc != Tegra::Shader::ConditionCode::T) {
110 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
111 } else {
112 bb.push_back(branch);
113 }
114 break;
115 }
116 case OpCode::Id::BRX: {
117 Node operand;
118 if (instr.brx.constant_buffer != 0) {
119 const s32 target = pc + 1;
120 const Node index = GetRegister(instr.gpr8);
121 const Node op_a =
122 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
123 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
124 PRECISE, op_a, Immediate(3));
125 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
126 } else {
127 const s32 target = pc + instr.brx.GetBranchExtend();
128 const Node op_a = GetRegister(instr.gpr8);
129 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
130 PRECISE, op_a, Immediate(3));
131 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
132 }
133 const Node branch = Operation(OperationCode::BranchIndirect, operand);
99 134
100 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 135 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
101 if (cc != Tegra::Shader::ConditionCode::T) { 136 if (cc != Tegra::Shader::ConditionCode::T) {
@@ -109,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
109 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 144 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
110 "Constant buffer flow is not supported"); 145 "Constant buffer flow is not supported");
111 146
147 if (disable_flow_stack) {
148 break;
149 }
150
112 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. 151 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
113 const u32 target = pc + instr.bra.GetBranchTarget(); 152 const u32 target = pc + instr.bra.GetBranchTarget();
114 bb.push_back( 153 bb.push_back(
@@ -119,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
119 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 158 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
120 "Constant buffer PBK is not supported"); 159 "Constant buffer PBK is not supported");
121 160
161 if (disable_flow_stack) {
162 break;
163 }
164
122 // PBK pushes to a stack the address where BRK will jump to. 165 // PBK pushes to a stack the address where BRK will jump to.
123 const u32 target = pc + instr.bra.GetBranchTarget(); 166 const u32 target = pc + instr.bra.GetBranchTarget();
124 bb.push_back( 167 bb.push_back(
@@ -130,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
130 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", 173 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
131 static_cast<u32>(cc)); 174 static_cast<u32>(cc));
132 175
176 if (disable_flow_stack) {
177 break;
178 }
179
133 // The SYNC opcode jumps to the address previously set by the SSY opcode 180 // The SYNC opcode jumps to the address previously set by the SSY opcode
134 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); 181 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
135 break; 182 break;
@@ -138,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
138 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 185 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
139 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", 186 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
140 static_cast<u32>(cc)); 187 static_cast<u32>(cc));
188 if (disable_flow_stack) {
189 break;
190 }
141 191
142 // The BRK opcode jumps to the address previously set by the PBK opcode 192 // The BRK opcode jumps to the address previously set by the PBK opcode
143 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); 193 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 0ac83fcf0..7427ed896 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -148,11 +148,12 @@ enum class OperationCode {
148 148
149 ImageStore, /// (MetaImage, float[N] coords) -> void 149 ImageStore, /// (MetaImage, float[N] coords) -> void
150 150
151 Branch, /// (uint branch_target) -> void 151 Branch, /// (uint branch_target) -> void
152 PushFlowStack, /// (uint branch_target) -> void 152 BranchIndirect, /// (uint branch_target) -> void
153 PopFlowStack, /// () -> void 153 PushFlowStack, /// (uint branch_target) -> void
154 Exit, /// () -> void 154 PopFlowStack, /// () -> void
155 Discard, /// () -> void 155 Exit, /// () -> void
156 Discard, /// () -> void
156 157
157 EmitVertex, /// () -> void 158 EmitVertex, /// () -> void
158 EndPrimitive, /// () -> void 159 EndPrimitive, /// () -> void
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 11b545cca..5994bfc4e 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition;
22using Tegra::Shader::PredOperation; 22using Tegra::Shader::PredOperation;
23using Tegra::Shader::Register; 23using Tegra::Shader::Register;
24 24
25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) 25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size)
26 : program_code{program_code}, main_offset{main_offset} { 26 : program_code{program_code}, main_offset{main_offset}, program_size{size} {
27 Decode(); 27 Decode();
28} 28}
29 29
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index e22548208..6145f0a70 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -22,18 +22,12 @@
22 22
23namespace VideoCommon::Shader { 23namespace VideoCommon::Shader {
24 24
25struct ShaderBlock;
26
25using ProgramCode = std::vector<u64>; 27using ProgramCode = std::vector<u64>;
26 28
27constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; 29constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
28 30
29/// Describes the behaviour of code path of a given entry point and a return point.
30enum class ExitMethod {
31 Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
32 AlwaysReturn, ///< All code paths reach the return point.
33 Conditional, ///< Code path reaches the return point or an END instruction conditionally.
34 AlwaysEnd, ///< All code paths reach a END instruction.
35};
36
37class ConstBuffer { 31class ConstBuffer {
38public: 32public:
39 explicit ConstBuffer(u32 max_offset, bool is_indirect) 33 explicit ConstBuffer(u32 max_offset, bool is_indirect)
@@ -73,7 +67,7 @@ struct GlobalMemoryUsage {
73 67
74class ShaderIR final { 68class ShaderIR final {
75public: 69public:
76 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); 70 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size);
77 ~ShaderIR(); 71 ~ShaderIR();
78 72
79 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 73 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -129,12 +123,20 @@ public:
129 return header; 123 return header;
130 } 124 }
131 125
126 bool IsFlowStackDisabled() const {
127 return disable_flow_stack;
128 }
129
130 u32 ConvertAddressToNvidiaSpace(const u32 address) const {
131 return (address - main_offset) * sizeof(Tegra::Shader::Instruction);
132 }
133
132private: 134private:
133 void Decode(); 135 void Decode();
134 136
135 ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);
136
137 NodeBlock DecodeRange(u32 begin, u32 end); 137 NodeBlock DecodeRange(u32 begin, u32 end);
138 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
139 void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
138 140
139 /** 141 /**
140 * Decodes a single instruction from Tegra to IR. 142 * Decodes a single instruction from Tegra to IR.
@@ -326,10 +328,11 @@ private:
326 328
327 const ProgramCode& program_code; 329 const ProgramCode& program_code;
328 const u32 main_offset; 330 const u32 main_offset;
331 const std::size_t program_size;
332 bool disable_flow_stack{};
329 333
330 u32 coverage_begin{}; 334 u32 coverage_begin{};
331 u32 coverage_end{}; 335 u32 coverage_end{};
332 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
333 336
334 std::map<u32, NodeBlock> basic_blocks; 337 std::map<u32, NodeBlock> basic_blocks;
335 NodeBlock global_code; 338 NodeBlock global_code;