summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2020-05-24 00:33:06 -0400
committerGravatar GitHub2020-05-24 00:33:06 -0400
commit487dd051706247771e1733b0671a417f63b7f532 (patch)
treed46d5180df6d6f31eeef73bb906b10fe155c8fbd /src
parentMerge pull request #3975 from ReinUsesLisp/fast-bufcache (diff)
parentshader/other: Implement thread comparisons (NV_shader_thread_group) (diff)
downloadyuzu-487dd051706247771e1733b0671a417f63b7f532.tar.gz
yuzu-487dd051706247771e1733b0671a417f63b7f532.tar.xz
yuzu-487dd051706247771e1733b0671a417f63b7f532.zip
Merge pull request #3979 from ReinUsesLisp/thread-group
shader/other: Implement thread comparisons (NV_shader_thread_group)
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp23
-rw-r--r--src/video_core/shader/decode/other.cpp21
-rw-r--r--src/video_core/shader/node.h5
4 files changed, 72 insertions, 0 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 960ebf1a1..c83a08d42 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2309,6 +2309,18 @@ private:
2309 return {"gl_SubGroupInvocationARB", Type::Uint}; 2309 return {"gl_SubGroupInvocationARB", Type::Uint};
2310 } 2310 }
2311 2311
2312 template <const std::string_view& comparison>
2313 Expression ThreadMask(Operation) {
2314 if (device.HasWarpIntrinsics()) {
2315 return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
2316 }
2317 if (device.HasShaderBallot()) {
2318 return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
2319 }
2320 LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
2321 return {"0U", Type::Uint};
2322 }
2323
2312 Expression ShuffleIndexed(Operation operation) { 2324 Expression ShuffleIndexed(Operation operation) {
2313 std::string value = VisitOperand(operation, 0).AsFloat(); 2325 std::string value = VisitOperand(operation, 0).AsFloat();
2314 2326
@@ -2337,6 +2349,12 @@ private:
2337 static constexpr std::string_view NotEqual = "!="; 2349 static constexpr std::string_view NotEqual = "!=";
2338 static constexpr std::string_view GreaterEqual = ">="; 2350 static constexpr std::string_view GreaterEqual = ">=";
2339 2351
2352 static constexpr std::string_view Eq = "Eq";
2353 static constexpr std::string_view Ge = "Ge";
2354 static constexpr std::string_view Gt = "Gt";
2355 static constexpr std::string_view Le = "Le";
2356 static constexpr std::string_view Lt = "Lt";
2357
2340 static constexpr std::string_view Add = "Add"; 2358 static constexpr std::string_view Add = "Add";
2341 static constexpr std::string_view Min = "Min"; 2359 static constexpr std::string_view Min = "Min";
2342 static constexpr std::string_view Max = "Max"; 2360 static constexpr std::string_view Max = "Max";
@@ -2554,6 +2572,11 @@ private:
2554 &GLSLDecompiler::VoteEqual, 2572 &GLSLDecompiler::VoteEqual,
2555 2573
2556 &GLSLDecompiler::ThreadId, 2574 &GLSLDecompiler::ThreadId,
2575 &GLSLDecompiler::ThreadMask<Func::Eq>,
2576 &GLSLDecompiler::ThreadMask<Func::Ge>,
2577 &GLSLDecompiler::ThreadMask<Func::Gt>,
2578 &GLSLDecompiler::ThreadMask<Func::Le>,
2579 &GLSLDecompiler::ThreadMask<Func::Lt>,
2557 &GLSLDecompiler::ShuffleIndexed, 2580 &GLSLDecompiler::ShuffleIndexed,
2558 2581
2559 &GLSLDecompiler::MemoryBarrierGL, 2582 &GLSLDecompiler::MemoryBarrierGL,
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 167e20e91..f4ccc9848 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -515,6 +515,16 @@ private:
515 void DeclareCommon() { 515 void DeclareCommon() {
516 thread_id = 516 thread_id =
517 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); 517 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
518 thread_masks[0] =
519 DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
520 thread_masks[1] =
521 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
522 thread_masks[2] =
523 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
524 thread_masks[3] =
525 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
526 thread_masks[4] =
527 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
518 } 528 }
519 529
520 void DeclareVertex() { 530 void DeclareVertex() {
@@ -2175,6 +2185,13 @@ private:
2175 return {OpLoad(t_uint, thread_id), Type::Uint}; 2185 return {OpLoad(t_uint, thread_id), Type::Uint};
2176 } 2186 }
2177 2187
2188 template <std::size_t index>
2189 Expression ThreadMask(Operation) {
2190 // TODO(Rodrigo): Handle devices with different warp sizes
2191 const Id mask = thread_masks[index];
2192 return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
2193 }
2194
2178 Expression ShuffleIndexed(Operation operation) { 2195 Expression ShuffleIndexed(Operation operation) {
2179 const Id value = AsFloat(Visit(operation[0])); 2196 const Id value = AsFloat(Visit(operation[0]));
2180 const Id index = AsUint(Visit(operation[1])); 2197 const Id index = AsUint(Visit(operation[1]));
@@ -2639,6 +2656,11 @@ private:
2639 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, 2656 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
2640 2657
2641 &SPIRVDecompiler::ThreadId, 2658 &SPIRVDecompiler::ThreadId,
2659 &SPIRVDecompiler::ThreadMask<0>, // Eq
2660 &SPIRVDecompiler::ThreadMask<1>, // Ge
2661 &SPIRVDecompiler::ThreadMask<2>, // Gt
2662 &SPIRVDecompiler::ThreadMask<3>, // Le
2663 &SPIRVDecompiler::ThreadMask<4>, // Lt
2642 &SPIRVDecompiler::ShuffleIndexed, 2664 &SPIRVDecompiler::ShuffleIndexed,
2643 2665
2644 &SPIRVDecompiler::MemoryBarrierGL, 2666 &SPIRVDecompiler::MemoryBarrierGL,
@@ -2763,6 +2785,7 @@ private:
2763 Id workgroup_id{}; 2785 Id workgroup_id{};
2764 Id local_invocation_id{}; 2786 Id local_invocation_id{};
2765 Id thread_id{}; 2787 Id thread_id{};
2788 std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
2766 2789
2767 VertexIndices in_indices; 2790 VertexIndices in_indices;
2768 VertexIndices out_indices; 2791 VertexIndices out_indices;
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d4f95b18c..399a455c4 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
109 return Operation(OperationCode::WorkGroupIdY); 109 return Operation(OperationCode::WorkGroupIdY);
110 case SystemVariable::CtaIdZ: 110 case SystemVariable::CtaIdZ:
111 return Operation(OperationCode::WorkGroupIdZ); 111 return Operation(OperationCode::WorkGroupIdZ);
112 case SystemVariable::EqMask:
113 case SystemVariable::LtMask:
114 case SystemVariable::LeMask:
115 case SystemVariable::GtMask:
116 case SystemVariable::GeMask:
117 uses_warps = true;
118 switch (instr.sys20) {
119 case SystemVariable::EqMask:
120 return Operation(OperationCode::ThreadEqMask);
121 case SystemVariable::LtMask:
122 return Operation(OperationCode::ThreadLtMask);
123 case SystemVariable::LeMask:
124 return Operation(OperationCode::ThreadLeMask);
125 case SystemVariable::GtMask:
126 return Operation(OperationCode::ThreadGtMask);
127 case SystemVariable::GeMask:
128 return Operation(OperationCode::ThreadGeMask);
129 default:
130 UNREACHABLE();
131 return Immediate(0u);
132 }
112 default: 133 default:
113 UNIMPLEMENTED_MSG("Unhandled system move: {}", 134 UNIMPLEMENTED_MSG("Unhandled system move: {}",
114 static_cast<u32>(instr.sys20.Value())); 135 static_cast<u32>(instr.sys20.Value()));
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index f75b62240..cce8aeebe 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -226,6 +226,11 @@ enum class OperationCode {
226 VoteEqual, /// (bool) -> bool 226 VoteEqual, /// (bool) -> bool
227 227
228 ThreadId, /// () -> uint 228 ThreadId, /// () -> uint
229 ThreadEqMask, /// () -> uint
230 ThreadGeMask, /// () -> uint
231 ThreadGtMask, /// () -> uint
232 ThreadLeMask, /// () -> uint
233 ThreadLtMask, /// () -> uint
229 ShuffleIndexed, /// (uint value, uint index) -> uint 234 ShuffleIndexed, /// (uint value, uint index) -> uint
230 235
231 MemoryBarrierGL, /// () -> void 236 MemoryBarrierGL, /// () -> void