diff options
| author | 2019-06-02 18:52:07 -0300 | |
|---|---|---|
| committer | 2019-06-07 02:18:27 -0300 | |
| commit | fe8e6618f2907a9262d69232ef0e2d5d58cbc6e0 (patch) | |
| tree | 308d278996e18558ab049daa01b4873b08b799e8 /src | |
| parent | Merge pull request #2558 from ReinUsesLisp/shader-nodes (diff) | |
| download | yuzu-fe8e6618f2907a9262d69232ef0e2d5d58cbc6e0.tar.gz yuzu-fe8e6618f2907a9262d69232ef0e2d5d58cbc6e0.tar.xz yuzu-fe8e6618f2907a9262d69232ef0e2d5d58cbc6e0.zip | |
shader: Split SSY and PBK stack
Hardware testing revealed that SSY and PBK push to a different stack,
allowing code like this:
SSY label1;
PBK label2;
SYNC;
label1: PBK;
label2: EXIT;
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/decode/other.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 7 |
4 files changed, 78 insertions, 27 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index f2d0722af..afcc06afc 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -143,6 +143,24 @@ u32 GetGenericAttributeIndex(Attribute::Index index) { | |||
| 143 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | 143 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); |
| 144 | } | 144 | } |
| 145 | 145 | ||
| 146 | constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { | ||
| 147 | switch (stack) { | ||
| 148 | case MetaStackClass::Ssy: | ||
| 149 | return "ssy"; | ||
| 150 | case MetaStackClass::Pbk: | ||
| 151 | return "pbk"; | ||
| 152 | } | ||
| 153 | return {}; | ||
| 154 | } | ||
| 155 | |||
| 156 | std::string FlowStackName(MetaStackClass stack) { | ||
| 157 | return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); | ||
| 158 | } | ||
| 159 | |||
| 160 | std::string FlowStackTopName(MetaStackClass stack) { | ||
| 161 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | ||
| 162 | } | ||
| 163 | |||
| 146 | class GLSLDecompiler final { | 164 | class GLSLDecompiler final { |
| 147 | public: | 165 | public: |
| 148 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, | 166 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, |
| @@ -173,8 +191,10 @@ public: | |||
| 173 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | 191 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems |
| 174 | // unlikely that shaders will use 20 nested SSYs and PBKs. | 192 | // unlikely that shaders will use 20 nested SSYs and PBKs. |
| 175 | constexpr u32 FLOW_STACK_SIZE = 20; | 193 | constexpr u32 FLOW_STACK_SIZE = 20; |
| 176 | code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE); | 194 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { |
| 177 | code.AddLine("uint flow_stack_top = 0u;"); | 195 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); |
| 196 | code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||
| 197 | } | ||
| 178 | 198 | ||
| 179 | code.AddLine("while (true) {{"); | 199 | code.AddLine("while (true) {{"); |
| 180 | ++code.scope; | 200 | ++code.scope; |
| @@ -1438,15 +1458,18 @@ private: | |||
| 1438 | } | 1458 | } |
| 1439 | 1459 | ||
| 1440 | std::string PushFlowStack(Operation operation) { | 1460 | std::string PushFlowStack(Operation operation) { |
| 1461 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1441 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 1462 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 1442 | UNIMPLEMENTED_IF(!target); | 1463 | UNIMPLEMENTED_IF(!target); |
| 1443 | 1464 | ||
| 1444 | code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); | 1465 | code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack), |
| 1466 | target->GetValue()); | ||
| 1445 | return {}; | 1467 | return {}; |
| 1446 | } | 1468 | } |
| 1447 | 1469 | ||
| 1448 | std::string PopFlowStack(Operation operation) { | 1470 | std::string PopFlowStack(Operation operation) { |
| 1449 | code.AddLine("jmp_to = flow_stack[--flow_stack_top];"); | 1471 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); |
| 1472 | code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); | ||
| 1450 | code.AddLine("break;"); | 1473 | code.AddLine("break;"); |
| 1451 | return {}; | 1474 | return {}; |
| 1452 | } | 1475 | } |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 547883425..33ad9764a 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -132,20 +132,16 @@ public: | |||
| 132 | branch_labels.push_back(label); | 132 | branch_labels.push_back(label); |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely | ||
| 136 | // that shaders will use 20 nested SSYs and PBKs. | ||
| 137 | constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 138 | const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); | ||
| 139 | jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint), | 135 | jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint), |
| 140 | spv::StorageClass::Function, Constant(t_uint, first_address))); | 136 | spv::StorageClass::Function, Constant(t_uint, first_address))); |
| 141 | flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type), | 137 | std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); |
| 142 | spv::StorageClass::Function, ConstantNull(flow_stack_type))); | 138 | std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); |
| 143 | flow_stack_top = | ||
| 144 | Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0))); | ||
| 145 | 139 | ||
| 146 | Name(jmp_to, "jmp_to"); | 140 | Name(jmp_to, "jmp_to"); |
| 147 | Name(flow_stack, "flow_stack"); | 141 | Name(ssy_flow_stack, "ssy_flow_stack"); |
| 148 | Name(flow_stack_top, "flow_stack_top"); | 142 | Name(ssy_flow_stack_top, "ssy_flow_stack_top"); |
| 143 | Name(pbk_flow_stack, "pbk_flow_stack"); | ||
| 144 | Name(pbk_flow_stack_top, "pbk_flow_stack_top"); | ||
| 149 | 145 | ||
| 150 | Emit(OpBranch(loop_label)); | 146 | Emit(OpBranch(loop_label)); |
| 151 | Emit(loop_label); | 147 | Emit(loop_label); |
| @@ -952,6 +948,7 @@ private: | |||
| 952 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 948 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 953 | ASSERT(target); | 949 | ASSERT(target); |
| 954 | 950 | ||
| 951 | const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); | ||
| 955 | const Id current = Emit(OpLoad(t_uint, flow_stack_top)); | 952 | const Id current = Emit(OpLoad(t_uint, flow_stack_top)); |
| 956 | const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1))); | 953 | const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1))); |
| 957 | const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current)); | 954 | const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current)); |
| @@ -962,6 +959,7 @@ private: | |||
| 962 | } | 959 | } |
| 963 | 960 | ||
| 964 | Id PopFlowStack(Operation operation) { | 961 | Id PopFlowStack(Operation operation) { |
| 962 | const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); | ||
| 965 | const Id current = Emit(OpLoad(t_uint, flow_stack_top)); | 963 | const Id current = Emit(OpLoad(t_uint, flow_stack_top)); |
| 966 | const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1))); | 964 | const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1))); |
| 967 | const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous)); | 965 | const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous)); |
| @@ -1172,6 +1170,31 @@ private: | |||
| 1172 | Emit(skip_label); | 1170 | Emit(skip_label); |
| 1173 | } | 1171 | } |
| 1174 | 1172 | ||
| 1173 | std::tuple<Id, Id> CreateFlowStack() { | ||
| 1174 | // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely | ||
| 1175 | // that shaders will use 20 nested SSYs and PBKs. | ||
| 1176 | constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 1177 | constexpr auto storage_class = spv::StorageClass::Function; | ||
| 1178 | |||
| 1179 | const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); | ||
| 1180 | const Id stack = Emit(OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, | ||
| 1181 | ConstantNull(flow_stack_type))); | ||
| 1182 | const Id top = Emit(OpVariable(t_func_uint, storage_class, Constant(t_uint, 0))); | ||
| 1183 | return std::tie(stack, top); | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | std::pair<Id, Id> GetFlowStack(Operation operation) { | ||
| 1187 | const auto stack_class = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1188 | switch (stack_class) { | ||
| 1189 | case MetaStackClass::Ssy: | ||
| 1190 | return {ssy_flow_stack, ssy_flow_stack_top}; | ||
| 1191 | case MetaStackClass::Pbk: | ||
| 1192 | return {pbk_flow_stack, pbk_flow_stack_top}; | ||
| 1193 | } | ||
| 1194 | UNREACHABLE(); | ||
| 1195 | return {}; | ||
| 1196 | } | ||
| 1197 | |||
| 1175 | static constexpr OperationDecompilersArray operation_decompilers = { | 1198 | static constexpr OperationDecompilersArray operation_decompilers = { |
| 1176 | &SPIRVDecompiler::Assign, | 1199 | &SPIRVDecompiler::Assign, |
| 1177 | 1200 | ||
| @@ -1414,8 +1437,10 @@ private: | |||
| 1414 | 1437 | ||
| 1415 | Id execute_function{}; | 1438 | Id execute_function{}; |
| 1416 | Id jmp_to{}; | 1439 | Id jmp_to{}; |
| 1417 | Id flow_stack_top{}; | 1440 | Id ssy_flow_stack_top{}; |
| 1418 | Id flow_stack{}; | 1441 | Id pbk_flow_stack_top{}; |
| 1442 | Id ssy_flow_stack{}; | ||
| 1443 | Id pbk_flow_stack{}; | ||
| 1419 | Id continue_label{}; | 1444 | Id continue_label{}; |
| 1420 | std::map<u32, Id> labels; | 1445 | std::map<u32, Id> labels; |
| 1421 | }; | 1446 | }; |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 6fc07f213..d46a8ab82 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -109,22 +109,20 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 109 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 109 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 110 | "Constant buffer flow is not supported"); | 110 | "Constant buffer flow is not supported"); |
| 111 | 111 | ||
| 112 | // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the | 112 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. |
| 113 | // target of the jump that the SYNC instruction will make. The SSY opcode has a similar | ||
| 114 | // structure to the BRA opcode. | ||
| 115 | const u32 target = pc + instr.bra.GetBranchTarget(); | 113 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 116 | bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); | 114 | bb.push_back( |
| 115 | Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); | ||
| 117 | break; | 116 | break; |
| 118 | } | 117 | } |
| 119 | case OpCode::Id::PBK: { | 118 | case OpCode::Id::PBK: { |
| 120 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | 119 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |
| 121 | "Constant buffer PBK is not supported"); | 120 | "Constant buffer PBK is not supported"); |
| 122 | 121 | ||
| 123 | // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but | 122 | // PBK pushes to a stack the address where BRK will jump to. |
| 124 | // using SYNC on a PBK address will kill the shader execution. We don't emulate this because | ||
| 125 | // it's very unlikely a driver will emit such invalid shader. | ||
| 126 | const u32 target = pc + instr.bra.GetBranchTarget(); | 123 | const u32 target = pc + instr.bra.GetBranchTarget(); |
| 127 | bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); | 124 | bb.push_back( |
| 125 | Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); | ||
| 128 | break; | 126 | break; |
| 129 | } | 127 | } |
| 130 | case OpCode::Id::SYNC: { | 128 | case OpCode::Id::SYNC: { |
| @@ -133,7 +131,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 133 | static_cast<u32>(cc)); | 131 | static_cast<u32>(cc)); |
| 134 | 132 | ||
| 135 | // The SYNC opcode jumps to the address previously set by the SSY opcode | 133 | // The SYNC opcode jumps to the address previously set by the SSY opcode |
| 136 | bb.push_back(Operation(OperationCode::PopFlowStack)); | 134 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); |
| 137 | break; | 135 | break; |
| 138 | } | 136 | } |
| 139 | case OpCode::Id::BRK: { | 137 | case OpCode::Id::BRK: { |
| @@ -142,7 +140,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 142 | static_cast<u32>(cc)); | 140 | static_cast<u32>(cc)); |
| 143 | 141 | ||
| 144 | // The BRK opcode jumps to the address previously set by the PBK opcode | 142 | // The BRK opcode jumps to the address previously set by the PBK opcode |
| 145 | bb.push_back(Operation(OperationCode::PopFlowStack)); | 143 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); |
| 146 | break; | 144 | break; |
| 147 | } | 145 | } |
| 148 | case OpCode::Id::IPA: { | 146 | case OpCode::Id::IPA: { |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index c002f90f9..3cfb911bb 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -174,6 +174,11 @@ enum class InternalFlag { | |||
| 174 | Amount = 4, | 174 | Amount = 4, |
| 175 | }; | 175 | }; |
| 176 | 176 | ||
| 177 | enum class MetaStackClass { | ||
| 178 | Ssy, | ||
| 179 | Pbk, | ||
| 180 | }; | ||
| 181 | |||
| 177 | class OperationNode; | 182 | class OperationNode; |
| 178 | class ConditionalNode; | 183 | class ConditionalNode; |
| 179 | class GprNode; | 184 | class GprNode; |
| @@ -285,7 +290,7 @@ struct MetaTexture { | |||
| 285 | }; | 290 | }; |
| 286 | 291 | ||
| 287 | /// Parameters that modify an operation but are not part of any particular operand | 292 | /// Parameters that modify an operation but are not part of any particular operand |
| 288 | using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; | 293 | using Meta = std::variant<MetaArithmetic, MetaTexture, MetaStackClass, Tegra::Shader::HalfType>; |
| 289 | 294 | ||
| 290 | /// Holds any kind of operation that can be done in the IR | 295 | /// Holds any kind of operation that can be done in the IR |
| 291 | class OperationNode final { | 296 | class OperationNode final { |