summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-05-15 01:43:44 -0300
committerGravatar ReinUsesLisp2020-05-21 23:18:37 -0300
commite2b67a868b7191237374226218756c1a62fabd4e (patch)
tree0ae75b1a89a5cdec2abf2433b20558f4aaab589b /src/video_core/renderer_vulkan
parentMerge pull request #3926 from ogniK5377/keyboard-states (diff)
downloadyuzu-e2b67a868b7191237374226218756c1a62fabd4e.tar.gz
yuzu-e2b67a868b7191237374226218756c1a62fabd4e.tar.xz
yuzu-e2b67a868b7191237374226218756c1a62fabd4e.zip
shader/other: Implement thread comparisons (NV_shader_thread_group)
Hardware S2R special registers match gl_Thread*MaskNV. We can trivially implement these using Nvidia's extension on OpenGL or naively stubbing them with the ARB instructions to match. This might cause issues if the host device warp size doesn't match Nvidia's. That said, this is unlikely on proper shaders. Refer to the attached url for more documentation about these flags. https://www.khronos.org/registry/OpenGL/extensions/NV/NV_shader_thread_group.txt
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp23
1 files changed, 23 insertions, 0 deletions
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 167e20e91..f4ccc9848 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -515,6 +515,16 @@ private:
515 void DeclareCommon() { 515 void DeclareCommon() {
516 thread_id = 516 thread_id =
517 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); 517 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
518 thread_masks[0] =
519 DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
520 thread_masks[1] =
521 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
522 thread_masks[2] =
523 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
524 thread_masks[3] =
525 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
526 thread_masks[4] =
527 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
518 } 528 }
519 529
520 void DeclareVertex() { 530 void DeclareVertex() {
@@ -2175,6 +2185,13 @@ private:
2175 return {OpLoad(t_uint, thread_id), Type::Uint}; 2185 return {OpLoad(t_uint, thread_id), Type::Uint};
2176 } 2186 }
2177 2187
2188 template <std::size_t index>
2189 Expression ThreadMask(Operation) {
2190 // TODO(Rodrigo): Handle devices with different warp sizes
2191 const Id mask = thread_masks[index];
2192 return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
2193 }
2194
2178 Expression ShuffleIndexed(Operation operation) { 2195 Expression ShuffleIndexed(Operation operation) {
2179 const Id value = AsFloat(Visit(operation[0])); 2196 const Id value = AsFloat(Visit(operation[0]));
2180 const Id index = AsUint(Visit(operation[1])); 2197 const Id index = AsUint(Visit(operation[1]));
@@ -2639,6 +2656,11 @@ private:
2639 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, 2656 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
2640 2657
2641 &SPIRVDecompiler::ThreadId, 2658 &SPIRVDecompiler::ThreadId,
2659 &SPIRVDecompiler::ThreadMask<0>, // Eq
2660 &SPIRVDecompiler::ThreadMask<1>, // Ge
2661 &SPIRVDecompiler::ThreadMask<2>, // Gt
2662 &SPIRVDecompiler::ThreadMask<3>, // Le
2663 &SPIRVDecompiler::ThreadMask<4>, // Lt
2642 &SPIRVDecompiler::ShuffleIndexed, 2664 &SPIRVDecompiler::ShuffleIndexed,
2643 2665
2644 &SPIRVDecompiler::MemoryBarrierGL, 2666 &SPIRVDecompiler::MemoryBarrierGL,
@@ -2763,6 +2785,7 @@ private:
2763 Id workgroup_id{}; 2785 Id workgroup_id{};
2764 Id local_invocation_id{}; 2786 Id local_invocation_id{};
2765 Id thread_id{}; 2787 Id thread_id{};
2788 std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
2766 2789
2767 VertexIndices in_indices; 2790 VertexIndices in_indices;
2768 VertexIndices out_indices; 2791 VertexIndices out_indices;