summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2019-06-02 18:52:07 -0300
committerGravatar ReinUsesLisp2019-06-07 02:18:27 -0300
commitfe8e6618f2907a9262d69232ef0e2d5d58cbc6e0 (patch)
tree308d278996e18558ab049daa01b4873b08b799e8 /src
parentMerge pull request #2558 from ReinUsesLisp/shader-nodes (diff)
downloadyuzu-fe8e6618f2907a9262d69232ef0e2d5d58cbc6e0.tar.gz
yuzu-fe8e6618f2907a9262d69232ef0e2d5d58cbc6e0.tar.xz
yuzu-fe8e6618f2907a9262d69232ef0e2d5d58cbc6e0.zip
shader: Split SSY and PBK stack
Hardware testing revealed that SSY and PBK push to a different stack, allowing code like this: SSY label1; PBK label2; SYNC; label1: PBK; label2: EXIT;
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp31
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp49
-rw-r--r--src/video_core/shader/decode/other.cpp18
-rw-r--r--src/video_core/shader/node.h7
4 files changed, 78 insertions, 27 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index f2d0722af..afcc06afc 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -143,6 +143,24 @@ u32 GetGenericAttributeIndex(Attribute::Index index) {
143 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); 143 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
144} 144}
145 145
146constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
147 switch (stack) {
148 case MetaStackClass::Ssy:
149 return "ssy";
150 case MetaStackClass::Pbk:
151 return "pbk";
152 }
153 return {};
154}
155
156std::string FlowStackName(MetaStackClass stack) {
157 return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
158}
159
160std::string FlowStackTopName(MetaStackClass stack) {
161 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
162}
163
146class GLSLDecompiler final { 164class GLSLDecompiler final {
147public: 165public:
148 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, 166 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
@@ -173,8 +191,10 @@ public:
173 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems 191 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
174 // unlikely that shaders will use 20 nested SSYs and PBKs. 192 // unlikely that shaders will use 20 nested SSYs and PBKs.
175 constexpr u32 FLOW_STACK_SIZE = 20; 193 constexpr u32 FLOW_STACK_SIZE = 20;
176 code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE); 194 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
177 code.AddLine("uint flow_stack_top = 0u;"); 195 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
196 code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
197 }
178 198
179 code.AddLine("while (true) {{"); 199 code.AddLine("while (true) {{");
180 ++code.scope; 200 ++code.scope;
@@ -1438,15 +1458,18 @@ private:
1438 } 1458 }
1439 1459
1440 std::string PushFlowStack(Operation operation) { 1460 std::string PushFlowStack(Operation operation) {
1461 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1441 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 1462 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
1442 UNIMPLEMENTED_IF(!target); 1463 UNIMPLEMENTED_IF(!target);
1443 1464
1444 code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); 1465 code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack),
1466 target->GetValue());
1445 return {}; 1467 return {};
1446 } 1468 }
1447 1469
1448 std::string PopFlowStack(Operation operation) { 1470 std::string PopFlowStack(Operation operation) {
1449 code.AddLine("jmp_to = flow_stack[--flow_stack_top];"); 1471 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1472 code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
1450 code.AddLine("break;"); 1473 code.AddLine("break;");
1451 return {}; 1474 return {};
1452 } 1475 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 547883425..33ad9764a 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -132,20 +132,16 @@ public:
132 branch_labels.push_back(label); 132 branch_labels.push_back(label);
133 } 133 }
134 134
135 // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
136 // that shaders will use 20 nested SSYs and PBKs.
137 constexpr u32 FLOW_STACK_SIZE = 20;
138 const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
139 jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint), 135 jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
140 spv::StorageClass::Function, Constant(t_uint, first_address))); 136 spv::StorageClass::Function, Constant(t_uint, first_address)));
141 flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type), 137 std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
142 spv::StorageClass::Function, ConstantNull(flow_stack_type))); 138 std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
143 flow_stack_top =
144 Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0)));
145 139
146 Name(jmp_to, "jmp_to"); 140 Name(jmp_to, "jmp_to");
147 Name(flow_stack, "flow_stack"); 141 Name(ssy_flow_stack, "ssy_flow_stack");
148 Name(flow_stack_top, "flow_stack_top"); 142 Name(ssy_flow_stack_top, "ssy_flow_stack_top");
143 Name(pbk_flow_stack, "pbk_flow_stack");
144 Name(pbk_flow_stack_top, "pbk_flow_stack_top");
149 145
150 Emit(OpBranch(loop_label)); 146 Emit(OpBranch(loop_label));
151 Emit(loop_label); 147 Emit(loop_label);
@@ -952,6 +948,7 @@ private:
952 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 948 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
953 ASSERT(target); 949 ASSERT(target);
954 950
951 const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
955 const Id current = Emit(OpLoad(t_uint, flow_stack_top)); 952 const Id current = Emit(OpLoad(t_uint, flow_stack_top));
956 const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1))); 953 const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1)));
957 const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current)); 954 const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current));
@@ -962,6 +959,7 @@ private:
962 } 959 }
963 960
964 Id PopFlowStack(Operation operation) { 961 Id PopFlowStack(Operation operation) {
962 const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
965 const Id current = Emit(OpLoad(t_uint, flow_stack_top)); 963 const Id current = Emit(OpLoad(t_uint, flow_stack_top));
966 const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1))); 964 const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1)));
967 const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous)); 965 const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous));
@@ -1172,6 +1170,31 @@ private:
1172 Emit(skip_label); 1170 Emit(skip_label);
1173 } 1171 }
1174 1172
1173 std::tuple<Id, Id> CreateFlowStack() {
1174 // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
1175 // that shaders will use 20 nested SSYs and PBKs.
1176 constexpr u32 FLOW_STACK_SIZE = 20;
1177 constexpr auto storage_class = spv::StorageClass::Function;
1178
1179 const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
1180 const Id stack = Emit(OpVariable(TypePointer(storage_class, flow_stack_type), storage_class,
1181 ConstantNull(flow_stack_type)));
1182 const Id top = Emit(OpVariable(t_func_uint, storage_class, Constant(t_uint, 0)));
1183 return std::tie(stack, top);
1184 }
1185
1186 std::pair<Id, Id> GetFlowStack(Operation operation) {
1187 const auto stack_class = std::get<MetaStackClass>(operation.GetMeta());
1188 switch (stack_class) {
1189 case MetaStackClass::Ssy:
1190 return {ssy_flow_stack, ssy_flow_stack_top};
1191 case MetaStackClass::Pbk:
1192 return {pbk_flow_stack, pbk_flow_stack_top};
1193 }
1194 UNREACHABLE();
1195 return {};
1196 }
1197
1175 static constexpr OperationDecompilersArray operation_decompilers = { 1198 static constexpr OperationDecompilersArray operation_decompilers = {
1176 &SPIRVDecompiler::Assign, 1199 &SPIRVDecompiler::Assign,
1177 1200
@@ -1414,8 +1437,10 @@ private:
1414 1437
1415 Id execute_function{}; 1438 Id execute_function{};
1416 Id jmp_to{}; 1439 Id jmp_to{};
1417 Id flow_stack_top{}; 1440 Id ssy_flow_stack_top{};
1418 Id flow_stack{}; 1441 Id pbk_flow_stack_top{};
1442 Id ssy_flow_stack{};
1443 Id pbk_flow_stack{};
1419 Id continue_label{}; 1444 Id continue_label{};
1420 std::map<u32, Id> labels; 1445 std::map<u32, Id> labels;
1421}; 1446};
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 6fc07f213..d46a8ab82 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -109,22 +109,20 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
109 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 109 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
110 "Constant buffer flow is not supported"); 110 "Constant buffer flow is not supported");
111 111
112 // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the 112 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
113 // target of the jump that the SYNC instruction will make. The SSY opcode has a similar
114 // structure to the BRA opcode.
115 const u32 target = pc + instr.bra.GetBranchTarget(); 113 const u32 target = pc + instr.bra.GetBranchTarget();
116 bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); 114 bb.push_back(
115 Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
117 break; 116 break;
118 } 117 }
119 case OpCode::Id::PBK: { 118 case OpCode::Id::PBK: {
120 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 119 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
121 "Constant buffer PBK is not supported"); 120 "Constant buffer PBK is not supported");
122 121
123 // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but 122 // PBK pushes to a stack the address where BRK will jump to.
124 // using SYNC on a PBK address will kill the shader execution. We don't emulate this because
125 // it's very unlikely a driver will emit such invalid shader.
126 const u32 target = pc + instr.bra.GetBranchTarget(); 123 const u32 target = pc + instr.bra.GetBranchTarget();
127 bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); 124 bb.push_back(
125 Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
128 break; 126 break;
129 } 127 }
130 case OpCode::Id::SYNC: { 128 case OpCode::Id::SYNC: {
@@ -133,7 +131,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
133 static_cast<u32>(cc)); 131 static_cast<u32>(cc));
134 132
135 // The SYNC opcode jumps to the address previously set by the SSY opcode 133 // The SYNC opcode jumps to the address previously set by the SSY opcode
136 bb.push_back(Operation(OperationCode::PopFlowStack)); 134 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
137 break; 135 break;
138 } 136 }
139 case OpCode::Id::BRK: { 137 case OpCode::Id::BRK: {
@@ -142,7 +140,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
142 static_cast<u32>(cc)); 140 static_cast<u32>(cc));
143 141
144 // The BRK opcode jumps to the address previously set by the PBK opcode 142 // The BRK opcode jumps to the address previously set by the PBK opcode
145 bb.push_back(Operation(OperationCode::PopFlowStack)); 143 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
146 break; 144 break;
147 } 145 }
148 case OpCode::Id::IPA: { 146 case OpCode::Id::IPA: {
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c002f90f9..3cfb911bb 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -174,6 +174,11 @@ enum class InternalFlag {
174 Amount = 4, 174 Amount = 4,
175}; 175};
176 176
177enum class MetaStackClass {
178 Ssy,
179 Pbk,
180};
181
177class OperationNode; 182class OperationNode;
178class ConditionalNode; 183class ConditionalNode;
179class GprNode; 184class GprNode;
@@ -285,7 +290,7 @@ struct MetaTexture {
285}; 290};
286 291
287/// Parameters that modify an operation but are not part of any particular operand 292/// Parameters that modify an operation but are not part of any particular operand
288using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; 293using Meta = std::variant<MetaArithmetic, MetaTexture, MetaStackClass, Tegra::Shader::HalfType>;
289 294
290/// Holds any kind of operation that can be done in the IR 295/// Holds any kind of operation that can be done in the IR
291class OperationNode final { 296class OperationNode final {