shader/other: Implement thread comparisons (NV_shader_thread_group)

Hardware S2R special registers match gl_Thread*MaskNV. We can trivially implement these using Nvidia's extension on OpenGL or naively stubbing them with the ARB instructions to match. This might cause issues if the host device warp size doesn't match Nvidia's. That said, this is unlikely on proper shaders. Refer to the attached url for more documentation about these flags. https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_thread_group.txt
author: ReinUsesLisp 2020-05-15 01:43:44 -0300
committer: ReinUsesLisp 2020-05-21 23:18:37 -0300
commit: e2b67a868b7191237374226218756c1a62fabd4e (patch)
tree: 0ae75b1a89a5cdec2abf2433b20558f4aaab589b /src/video_core/renderer_vulkan
parent: Merge pull request #3926 from ogniK5377/keyboard-states (diff)
download: yuzu-e2b67a868b7191237374226218756c1a62fabd4e.tar.gz
yuzu-e2b67a868b7191237374226218756c1a62fabd4e.tar.xz
yuzu-e2b67a868b7191237374226218756c1a62fabd4e.zip
1 files changed, 23 insertions, 0 deletions
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 167e20e91..f4ccc9848 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -515,6 +515,16 @@ private:
    void DeclareCommon() {
        thread_id =
            DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
+        thread_masks[0] =
+            DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
+        thread_masks[1] =
+            DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
+        thread_masks[2] =
+            DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
+        thread_masks[3] =
+            DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
+        thread_masks[4] =
+            DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
    }
    void DeclareVertex() {
@@ -2175,6 +2185,13 @@ private:
        return {OpLoad(t_uint, thread_id), Type::Uint};
    }
+    template <std::size_t index>
+    Expression ThreadMask(Operation) {
+        // TODO(Rodrigo): Handle devices with different warp sizes
+        const Id mask = thread_masks[index];
+        return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
+    }
    Expression ShuffleIndexed(Operation operation) {
        const Id value = AsFloat(Visit(operation[0]));
        const Id index = AsUint(Visit(operation[1]));
@@ -2639,6 +2656,11 @@ private:
        &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
        &SPIRVDecompiler::ThreadId,
+        &SPIRVDecompiler::ThreadMask<0>, // Eq
+        &SPIRVDecompiler::ThreadMask<1>, // Ge
+        &SPIRVDecompiler::ThreadMask<2>, // Gt
+        &SPIRVDecompiler::ThreadMask<3>, // Le
+        &SPIRVDecompiler::ThreadMask<4>, // Lt
        &SPIRVDecompiler::ShuffleIndexed,
        &SPIRVDecompiler::MemoryBarrierGL,
@@ -2763,6 +2785,7 @@ private:
    Id workgroup_id{};
    Id local_invocation_id{};
    Id thread_id{};
+    std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
    VertexIndices in_indices;
    VertexIndices out_indices;
author	ReinUsesLisp	2020-05-15 01:43:44 -0300
committer	ReinUsesLisp	2020-05-21 23:18:37 -0300
commit	e2b67a868b7191237374226218756c1a62fabd4e (patch)
tree	0ae75b1a89a5cdec2abf2433b20558f4aaab589b /src/video_core/renderer_vulkan
parent	Merge pull request #3926 from ogniK5377/keyboard-states (diff)
download	yuzu-e2b67a868b7191237374226218756c1a62fabd4e.tar.gz yuzu-e2b67a868b7191237374226218756c1a62fabd4e.tar.xz yuzu-e2b67a868b7191237374226218756c1a62fabd4e.zip