summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-05-15 01:43:44 -0300
committerGravatar ReinUsesLisp2020-05-21 23:18:37 -0300
commite2b67a868b7191237374226218756c1a62fabd4e (patch)
tree0ae75b1a89a5cdec2abf2433b20558f4aaab589b /src/video_core/renderer_opengl
parentMerge pull request #3926 from ogniK5377/keyboard-states (diff)
downloadyuzu-e2b67a868b7191237374226218756c1a62fabd4e.tar.gz
yuzu-e2b67a868b7191237374226218756c1a62fabd4e.tar.xz
yuzu-e2b67a868b7191237374226218756c1a62fabd4e.zip
shader/other: Implement thread comparisons (NV_shader_thread_group)
Hardware S2R special registers match gl_Thread*MaskNV. We can trivially implement these using Nvidia's extension on OpenGL or naively stubbing them with the ARB instructions to match. This might cause issues if the host device warp size doesn't match Nvidia's. That said, this is unlikely on proper shaders. Refer to the attached url for more documentation about these flags. https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_thread_group.txt
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp23
1 files changed, 23 insertions, 0 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 960ebf1a1..c83a08d42 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2309,6 +2309,18 @@ private:
2309 return {"gl_SubGroupInvocationARB", Type::Uint}; 2309 return {"gl_SubGroupInvocationARB", Type::Uint};
2310 } 2310 }
2311 2311
2312 template <const std::string_view& comparison>
2313 Expression ThreadMask(Operation) {
2314 if (device.HasWarpIntrinsics()) {
2315 return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
2316 }
2317 if (device.HasShaderBallot()) {
2318 return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
2319 }
2320 LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
2321 return {"0U", Type::Uint};
2322 }
2323
2312 Expression ShuffleIndexed(Operation operation) { 2324 Expression ShuffleIndexed(Operation operation) {
2313 std::string value = VisitOperand(operation, 0).AsFloat(); 2325 std::string value = VisitOperand(operation, 0).AsFloat();
2314 2326
@@ -2337,6 +2349,12 @@ private:
2337 static constexpr std::string_view NotEqual = "!="; 2349 static constexpr std::string_view NotEqual = "!=";
2338 static constexpr std::string_view GreaterEqual = ">="; 2350 static constexpr std::string_view GreaterEqual = ">=";
2339 2351
2352 static constexpr std::string_view Eq = "Eq";
2353 static constexpr std::string_view Ge = "Ge";
2354 static constexpr std::string_view Gt = "Gt";
2355 static constexpr std::string_view Le = "Le";
2356 static constexpr std::string_view Lt = "Lt";
2357
2340 static constexpr std::string_view Add = "Add"; 2358 static constexpr std::string_view Add = "Add";
2341 static constexpr std::string_view Min = "Min"; 2359 static constexpr std::string_view Min = "Min";
2342 static constexpr std::string_view Max = "Max"; 2360 static constexpr std::string_view Max = "Max";
@@ -2554,6 +2572,11 @@ private:
2554 &GLSLDecompiler::VoteEqual, 2572 &GLSLDecompiler::VoteEqual,
2555 2573
2556 &GLSLDecompiler::ThreadId, 2574 &GLSLDecompiler::ThreadId,
2575 &GLSLDecompiler::ThreadMask<Func::Eq>,
2576 &GLSLDecompiler::ThreadMask<Func::Ge>,
2577 &GLSLDecompiler::ThreadMask<Func::Gt>,
2578 &GLSLDecompiler::ThreadMask<Func::Le>,
2579 &GLSLDecompiler::ThreadMask<Func::Lt>,
2557 &GLSLDecompiler::ShuffleIndexed, 2580 &GLSLDecompiler::ShuffleIndexed,
2558 2581
2559 &GLSLDecompiler::MemoryBarrierGL, 2582 &GLSLDecompiler::MemoryBarrierGL,