summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-05-15 01:43:44 -0300
committerGravatar ReinUsesLisp2020-05-21 23:18:37 -0300
commite2b67a868b7191237374226218756c1a62fabd4e (patch)
tree0ae75b1a89a5cdec2abf2433b20558f4aaab589b /src
parentMerge pull request #3926 from ogniK5377/keyboard-states (diff)
downloadyuzu-e2b67a868b7191237374226218756c1a62fabd4e.tar.gz
yuzu-e2b67a868b7191237374226218756c1a62fabd4e.tar.xz
yuzu-e2b67a868b7191237374226218756c1a62fabd4e.zip
shader/other: Implement thread comparisons (NV_shader_thread_group)
Hardware S2R special registers match gl_Thread*MaskNV. We can trivially implement these using Nvidia's extension on OpenGL or naively stubbing them with the ARB instructions to match. This might cause issues if the host device warp size doesn't match Nvidia's. That said, this is unlikely on proper shaders. Refer to the attached url for more documentation about these flags. https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_thread_group.txt
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp23
-rw-r--r--src/video_core/shader/decode/other.cpp21
-rw-r--r--src/video_core/shader/node.h5
4 files changed, 72 insertions, 0 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 960ebf1a1..c83a08d42 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2309,6 +2309,18 @@ private:
2309 return {"gl_SubGroupInvocationARB", Type::Uint}; 2309 return {"gl_SubGroupInvocationARB", Type::Uint};
2310 } 2310 }
2311 2311
2312 template <const std::string_view& comparison>
2313 Expression ThreadMask(Operation) {
2314 if (device.HasWarpIntrinsics()) {
2315 return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
2316 }
2317 if (device.HasShaderBallot()) {
2318 return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
2319 }
2320 LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
2321 return {"0U", Type::Uint};
2322 }
2323
2312 Expression ShuffleIndexed(Operation operation) { 2324 Expression ShuffleIndexed(Operation operation) {
2313 std::string value = VisitOperand(operation, 0).AsFloat(); 2325 std::string value = VisitOperand(operation, 0).AsFloat();
2314 2326
@@ -2337,6 +2349,12 @@ private:
2337 static constexpr std::string_view NotEqual = "!="; 2349 static constexpr std::string_view NotEqual = "!=";
2338 static constexpr std::string_view GreaterEqual = ">="; 2350 static constexpr std::string_view GreaterEqual = ">=";
2339 2351
2352 static constexpr std::string_view Eq = "Eq";
2353 static constexpr std::string_view Ge = "Ge";
2354 static constexpr std::string_view Gt = "Gt";
2355 static constexpr std::string_view Le = "Le";
2356 static constexpr std::string_view Lt = "Lt";
2357
2340 static constexpr std::string_view Add = "Add"; 2358 static constexpr std::string_view Add = "Add";
2341 static constexpr std::string_view Min = "Min"; 2359 static constexpr std::string_view Min = "Min";
2342 static constexpr std::string_view Max = "Max"; 2360 static constexpr std::string_view Max = "Max";
@@ -2554,6 +2572,11 @@ private:
2554 &GLSLDecompiler::VoteEqual, 2572 &GLSLDecompiler::VoteEqual,
2555 2573
2556 &GLSLDecompiler::ThreadId, 2574 &GLSLDecompiler::ThreadId,
2575 &GLSLDecompiler::ThreadMask<Func::Eq>,
2576 &GLSLDecompiler::ThreadMask<Func::Ge>,
2577 &GLSLDecompiler::ThreadMask<Func::Gt>,
2578 &GLSLDecompiler::ThreadMask<Func::Le>,
2579 &GLSLDecompiler::ThreadMask<Func::Lt>,
2557 &GLSLDecompiler::ShuffleIndexed, 2580 &GLSLDecompiler::ShuffleIndexed,
2558 2581
2559 &GLSLDecompiler::MemoryBarrierGL, 2582 &GLSLDecompiler::MemoryBarrierGL,
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 167e20e91..f4ccc9848 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -515,6 +515,16 @@ private:
515 void DeclareCommon() { 515 void DeclareCommon() {
516 thread_id = 516 thread_id =
517 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); 517 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
518 thread_masks[0] =
519 DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
520 thread_masks[1] =
521 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
522 thread_masks[2] =
523 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
524 thread_masks[3] =
525 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
526 thread_masks[4] =
527 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
518 } 528 }
519 529
520 void DeclareVertex() { 530 void DeclareVertex() {
@@ -2175,6 +2185,13 @@ private:
2175 return {OpLoad(t_uint, thread_id), Type::Uint}; 2185 return {OpLoad(t_uint, thread_id), Type::Uint};
2176 } 2186 }
2177 2187
2188 template <std::size_t index>
2189 Expression ThreadMask(Operation) {
2190 // TODO(Rodrigo): Handle devices with different warp sizes
2191 const Id mask = thread_masks[index];
2192 return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
2193 }
2194
2178 Expression ShuffleIndexed(Operation operation) { 2195 Expression ShuffleIndexed(Operation operation) {
2179 const Id value = AsFloat(Visit(operation[0])); 2196 const Id value = AsFloat(Visit(operation[0]));
2180 const Id index = AsUint(Visit(operation[1])); 2197 const Id index = AsUint(Visit(operation[1]));
@@ -2639,6 +2656,11 @@ private:
2639 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, 2656 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
2640 2657
2641 &SPIRVDecompiler::ThreadId, 2658 &SPIRVDecompiler::ThreadId,
2659 &SPIRVDecompiler::ThreadMask<0>, // Eq
2660 &SPIRVDecompiler::ThreadMask<1>, // Ge
2661 &SPIRVDecompiler::ThreadMask<2>, // Gt
2662 &SPIRVDecompiler::ThreadMask<3>, // Le
2663 &SPIRVDecompiler::ThreadMask<4>, // Lt
2642 &SPIRVDecompiler::ShuffleIndexed, 2664 &SPIRVDecompiler::ShuffleIndexed,
2643 2665
2644 &SPIRVDecompiler::MemoryBarrierGL, 2666 &SPIRVDecompiler::MemoryBarrierGL,
@@ -2763,6 +2785,7 @@ private:
2763 Id workgroup_id{}; 2785 Id workgroup_id{};
2764 Id local_invocation_id{}; 2786 Id local_invocation_id{};
2765 Id thread_id{}; 2787 Id thread_id{};
2788 std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
2766 2789
2767 VertexIndices in_indices; 2790 VertexIndices in_indices;
2768 VertexIndices out_indices; 2791 VertexIndices out_indices;
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d4f95b18c..399a455c4 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
109 return Operation(OperationCode::WorkGroupIdY); 109 return Operation(OperationCode::WorkGroupIdY);
110 case SystemVariable::CtaIdZ: 110 case SystemVariable::CtaIdZ:
111 return Operation(OperationCode::WorkGroupIdZ); 111 return Operation(OperationCode::WorkGroupIdZ);
112 case SystemVariable::EqMask:
113 case SystemVariable::LtMask:
114 case SystemVariable::LeMask:
115 case SystemVariable::GtMask:
116 case SystemVariable::GeMask:
117 uses_warps = true;
118 switch (instr.sys20) {
119 case SystemVariable::EqMask:
120 return Operation(OperationCode::ThreadEqMask);
121 case SystemVariable::LtMask:
122 return Operation(OperationCode::ThreadLtMask);
123 case SystemVariable::LeMask:
124 return Operation(OperationCode::ThreadLeMask);
125 case SystemVariable::GtMask:
126 return Operation(OperationCode::ThreadGtMask);
127 case SystemVariable::GeMask:
128 return Operation(OperationCode::ThreadGeMask);
129 default:
130 UNREACHABLE();
131 return Immediate(0u);
132 }
112 default: 133 default:
113 UNIMPLEMENTED_MSG("Unhandled system move: {}", 134 UNIMPLEMENTED_MSG("Unhandled system move: {}",
114 static_cast<u32>(instr.sys20.Value())); 135 static_cast<u32>(instr.sys20.Value()));
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index f75b62240..cce8aeebe 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -226,6 +226,11 @@ enum class OperationCode {
226 VoteEqual, /// (bool) -> bool 226 VoteEqual, /// (bool) -> bool
227 227
228 ThreadId, /// () -> uint 228 ThreadId, /// () -> uint
229 ThreadEqMask, /// () -> uint
230 ThreadGeMask, /// () -> uint
231 ThreadGtMask, /// () -> uint
232 ThreadLeMask, /// () -> uint
233 ThreadLtMask, /// () -> uint
229 ShuffleIndexed, /// (uint value, uint index) -> uint 234 ShuffleIndexed, /// (uint value, uint index) -> uint
230 235
231 MemoryBarrierGL, /// () -> void 236 MemoryBarrierGL, /// () -> void