diff options
| author | 2020-05-24 00:33:06 -0400 | |
|---|---|---|
| committer | 2020-05-24 00:33:06 -0400 | |
| commit | 487dd051706247771e1733b0671a417f63b7f532 (patch) | |
| tree | d46d5180df6d6f31eeef73bb906b10fe155c8fbd /src | |
| parent | Merge pull request #3975 from ReinUsesLisp/fast-bufcache (diff) | |
| parent | shader/other: Implement thread comparisons (NV_shader_thread_group) (diff) | |
| download | yuzu-487dd051706247771e1733b0671a417f63b7f532.tar.gz yuzu-487dd051706247771e1733b0671a417f63b7f532.tar.xz yuzu-487dd051706247771e1733b0671a417f63b7f532.zip | |
Merge pull request #3979 from ReinUsesLisp/thread-group
shader/other: Implement thread comparisons (NV_shader_thread_group)
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/shader/decode/other.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 5 |
4 files changed, 72 insertions, 0 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 960ebf1a1..c83a08d42 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -2309,6 +2309,18 @@ private: | |||
| 2309 | return {"gl_SubGroupInvocationARB", Type::Uint}; | 2309 | return {"gl_SubGroupInvocationARB", Type::Uint}; |
| 2310 | } | 2310 | } |
| 2311 | 2311 | ||
| 2312 | template <const std::string_view& comparison> | ||
| 2313 | Expression ThreadMask(Operation) { | ||
| 2314 | if (device.HasWarpIntrinsics()) { | ||
| 2315 | return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; | ||
| 2316 | } | ||
| 2317 | if (device.HasShaderBallot()) { | ||
| 2318 | return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; | ||
| 2319 | } | ||
| 2320 | LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); | ||
| 2321 | return {"0U", Type::Uint}; | ||
| 2322 | } | ||
| 2323 | |||
| 2312 | Expression ShuffleIndexed(Operation operation) { | 2324 | Expression ShuffleIndexed(Operation operation) { |
| 2313 | std::string value = VisitOperand(operation, 0).AsFloat(); | 2325 | std::string value = VisitOperand(operation, 0).AsFloat(); |
| 2314 | 2326 | ||
| @@ -2337,6 +2349,12 @@ private: | |||
| 2337 | static constexpr std::string_view NotEqual = "!="; | 2349 | static constexpr std::string_view NotEqual = "!="; |
| 2338 | static constexpr std::string_view GreaterEqual = ">="; | 2350 | static constexpr std::string_view GreaterEqual = ">="; |
| 2339 | 2351 | ||
| 2352 | static constexpr std::string_view Eq = "Eq"; | ||
| 2353 | static constexpr std::string_view Ge = "Ge"; | ||
| 2354 | static constexpr std::string_view Gt = "Gt"; | ||
| 2355 | static constexpr std::string_view Le = "Le"; | ||
| 2356 | static constexpr std::string_view Lt = "Lt"; | ||
| 2357 | |||
| 2340 | static constexpr std::string_view Add = "Add"; | 2358 | static constexpr std::string_view Add = "Add"; |
| 2341 | static constexpr std::string_view Min = "Min"; | 2359 | static constexpr std::string_view Min = "Min"; |
| 2342 | static constexpr std::string_view Max = "Max"; | 2360 | static constexpr std::string_view Max = "Max"; |
| @@ -2554,6 +2572,11 @@ private: | |||
| 2554 | &GLSLDecompiler::VoteEqual, | 2572 | &GLSLDecompiler::VoteEqual, |
| 2555 | 2573 | ||
| 2556 | &GLSLDecompiler::ThreadId, | 2574 | &GLSLDecompiler::ThreadId, |
| 2575 | &GLSLDecompiler::ThreadMask<Func::Eq>, | ||
| 2576 | &GLSLDecompiler::ThreadMask<Func::Ge>, | ||
| 2577 | &GLSLDecompiler::ThreadMask<Func::Gt>, | ||
| 2578 | &GLSLDecompiler::ThreadMask<Func::Le>, | ||
| 2579 | &GLSLDecompiler::ThreadMask<Func::Lt>, | ||
| 2557 | &GLSLDecompiler::ShuffleIndexed, | 2580 | &GLSLDecompiler::ShuffleIndexed, |
| 2558 | 2581 | ||
| 2559 | &GLSLDecompiler::MemoryBarrierGL, | 2582 | &GLSLDecompiler::MemoryBarrierGL, |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 167e20e91..f4ccc9848 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -515,6 +515,16 @@ private: | |||
| 515 | void DeclareCommon() { | 515 | void DeclareCommon() { |
| 516 | thread_id = | 516 | thread_id = |
| 517 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); | 517 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); |
| 518 | thread_masks[0] = | ||
| 519 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); | ||
| 520 | thread_masks[1] = | ||
| 521 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); | ||
| 522 | thread_masks[2] = | ||
| 523 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); | ||
| 524 | thread_masks[3] = | ||
| 525 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); | ||
| 526 | thread_masks[4] = | ||
| 527 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); | ||
| 518 | } | 528 | } |
| 519 | 529 | ||
| 520 | void DeclareVertex() { | 530 | void DeclareVertex() { |
| @@ -2175,6 +2185,13 @@ private: | |||
| 2175 | return {OpLoad(t_uint, thread_id), Type::Uint}; | 2185 | return {OpLoad(t_uint, thread_id), Type::Uint}; |
| 2176 | } | 2186 | } |
| 2177 | 2187 | ||
| 2188 | template <std::size_t index> | ||
| 2189 | Expression ThreadMask(Operation) { | ||
| 2190 | // TODO(Rodrigo): Handle devices with different warp sizes | ||
| 2191 | const Id mask = thread_masks[index]; | ||
| 2192 | return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; | ||
| 2193 | } | ||
| 2194 | |||
| 2178 | Expression ShuffleIndexed(Operation operation) { | 2195 | Expression ShuffleIndexed(Operation operation) { |
| 2179 | const Id value = AsFloat(Visit(operation[0])); | 2196 | const Id value = AsFloat(Visit(operation[0])); |
| 2180 | const Id index = AsUint(Visit(operation[1])); | 2197 | const Id index = AsUint(Visit(operation[1])); |
| @@ -2639,6 +2656,11 @@ private: | |||
| 2639 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, | 2656 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, |
| 2640 | 2657 | ||
| 2641 | &SPIRVDecompiler::ThreadId, | 2658 | &SPIRVDecompiler::ThreadId, |
| 2659 | &SPIRVDecompiler::ThreadMask<0>, // Eq | ||
| 2660 | &SPIRVDecompiler::ThreadMask<1>, // Ge | ||
| 2661 | &SPIRVDecompiler::ThreadMask<2>, // Gt | ||
| 2662 | &SPIRVDecompiler::ThreadMask<3>, // Le | ||
| 2663 | &SPIRVDecompiler::ThreadMask<4>, // Lt | ||
| 2642 | &SPIRVDecompiler::ShuffleIndexed, | 2664 | &SPIRVDecompiler::ShuffleIndexed, |
| 2643 | 2665 | ||
| 2644 | &SPIRVDecompiler::MemoryBarrierGL, | 2666 | &SPIRVDecompiler::MemoryBarrierGL, |
| @@ -2763,6 +2785,7 @@ private: | |||
| 2763 | Id workgroup_id{}; | 2785 | Id workgroup_id{}; |
| 2764 | Id local_invocation_id{}; | 2786 | Id local_invocation_id{}; |
| 2765 | Id thread_id{}; | 2787 | Id thread_id{}; |
| 2788 | std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt | ||
| 2766 | 2789 | ||
| 2767 | VertexIndices in_indices; | 2790 | VertexIndices in_indices; |
| 2768 | VertexIndices out_indices; | 2791 | VertexIndices out_indices; |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d4f95b18c..399a455c4 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 109 | return Operation(OperationCode::WorkGroupIdY); | 109 | return Operation(OperationCode::WorkGroupIdY); |
| 110 | case SystemVariable::CtaIdZ: | 110 | case SystemVariable::CtaIdZ: |
| 111 | return Operation(OperationCode::WorkGroupIdZ); | 111 | return Operation(OperationCode::WorkGroupIdZ); |
| 112 | case SystemVariable::EqMask: | ||
| 113 | case SystemVariable::LtMask: | ||
| 114 | case SystemVariable::LeMask: | ||
| 115 | case SystemVariable::GtMask: | ||
| 116 | case SystemVariable::GeMask: | ||
| 117 | uses_warps = true; | ||
| 118 | switch (instr.sys20) { | ||
| 119 | case SystemVariable::EqMask: | ||
| 120 | return Operation(OperationCode::ThreadEqMask); | ||
| 121 | case SystemVariable::LtMask: | ||
| 122 | return Operation(OperationCode::ThreadLtMask); | ||
| 123 | case SystemVariable::LeMask: | ||
| 124 | return Operation(OperationCode::ThreadLeMask); | ||
| 125 | case SystemVariable::GtMask: | ||
| 126 | return Operation(OperationCode::ThreadGtMask); | ||
| 127 | case SystemVariable::GeMask: | ||
| 128 | return Operation(OperationCode::ThreadGeMask); | ||
| 129 | default: | ||
| 130 | UNREACHABLE(); | ||
| 131 | return Immediate(0u); | ||
| 132 | } | ||
| 112 | default: | 133 | default: |
| 113 | UNIMPLEMENTED_MSG("Unhandled system move: {}", | 134 | UNIMPLEMENTED_MSG("Unhandled system move: {}", |
| 114 | static_cast<u32>(instr.sys20.Value())); | 135 | static_cast<u32>(instr.sys20.Value())); |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index f75b62240..cce8aeebe 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -226,6 +226,11 @@ enum class OperationCode { | |||
| 226 | VoteEqual, /// (bool) -> bool | 226 | VoteEqual, /// (bool) -> bool |
| 227 | 227 | ||
| 228 | ThreadId, /// () -> uint | 228 | ThreadId, /// () -> uint |
| 229 | ThreadEqMask, /// () -> uint | ||
| 230 | ThreadGeMask, /// () -> uint | ||
| 231 | ThreadGtMask, /// () -> uint | ||
| 232 | ThreadLeMask, /// () -> uint | ||
| 233 | ThreadLtMask, /// () -> uint | ||
| 229 | ShuffleIndexed, /// (uint value, uint index) -> uint | 234 | ShuffleIndexed, /// (uint value, uint index) -> uint |
| 230 | 235 | ||
| 231 | MemoryBarrierGL, /// () -> void | 236 | MemoryBarrierGL, /// () -> void |