summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2019-08-29 12:58:10 -0400
committerGravatar GitHub2019-08-29 12:58:10 -0400
commit680ab6132726946435081df6c4f2ef01ec2f1691 (patch)
tree230bc98a8bdd67a95c5abde32294655469845b3b /src
parentMerge pull request #2748 from FernandoS27/align-memory (diff)
parentshader_ir: Implement VOTE (diff)
downloadyuzu-680ab6132726946435081df6c4f2ef01ec2f1691.tar.gz
yuzu-680ab6132726946435081df6c4f2ef01ec2f1691.tar.xz
yuzu-680ab6132726946435081df6c4f2ef01ec2f1691.zip
Merge pull request #2786 from ReinUsesLisp/vote
shader_ir: Implement VOTE on Nvidia drivers
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/shader_bytecode.h16
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp47
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp25
-rw-r--r--src/video_core/shader/decode.cpp1
-rw-r--r--src/video_core/shader/decode/warp.cpp55
-rw-r--r--src/video_core/shader/node.h5
-rw-r--r--src/video_core/shader/shader_ir.h1
12 files changed, 163 insertions, 1 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2b4266f29..01abdb3bb 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -55,6 +55,7 @@ add_custom_command(OUTPUT scm_rev.cpp
55 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" 55 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
56 "${VIDEO_CORE}/shader/decode/shift.cpp" 56 "${VIDEO_CORE}/shader/decode/shift.cpp"
57 "${VIDEO_CORE}/shader/decode/video.cpp" 57 "${VIDEO_CORE}/shader/decode/video.cpp"
58 "${VIDEO_CORE}/shader/decode/warp.cpp"
58 "${VIDEO_CORE}/shader/decode/xmad.cpp" 59 "${VIDEO_CORE}/shader/decode/xmad.cpp"
59 "${VIDEO_CORE}/shader/control_flow.cpp" 60 "${VIDEO_CORE}/shader/control_flow.cpp"
60 "${VIDEO_CORE}/shader/control_flow.h" 61 "${VIDEO_CORE}/shader/control_flow.h"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 7c18c27b3..f315e021d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -100,6 +100,7 @@ add_library(video_core STATIC
100 shader/decode/integer_set.cpp 100 shader/decode/integer_set.cpp
101 shader/decode/half_set.cpp 101 shader/decode/half_set.cpp
102 shader/decode/video.cpp 102 shader/decode/video.cpp
103 shader/decode/warp.cpp
103 shader/decode/xmad.cpp 104 shader/decode/xmad.cpp
104 shader/decode/other.cpp 105 shader/decode/other.cpp
105 shader/control_flow.cpp 106 shader/control_flow.cpp
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index aaa1acea9..bc8c2a1c5 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -538,6 +538,12 @@ enum class PhysicalAttributeDirection : u64 {
538 Output = 1, 538 Output = 1,
539}; 539};
540 540
541enum class VoteOperation : u64 {
542 All = 0, // allThreadsNV
543 Any = 1, // anyThreadNV
544 Eq = 2, // allThreadsEqualNV
545};
546
541union Instruction { 547union Instruction {
542 Instruction& operator=(const Instruction& instr) { 548 Instruction& operator=(const Instruction& instr) {
543 value = instr.value; 549 value = instr.value;
@@ -565,6 +571,13 @@ union Instruction {
565 } nop; 571 } nop;
566 572
567 union { 573 union {
574 BitField<48, 2, VoteOperation> operation;
575 BitField<45, 3, u64> dest_pred;
576 BitField<39, 3, u64> value;
577 BitField<42, 1, u64> negate_value;
578 } vote;
579
580 union {
568 BitField<8, 8, Register> gpr; 581 BitField<8, 8, Register> gpr;
569 BitField<20, 24, s64> offset; 582 BitField<20, 24, s64> offset;
570 } gmem; 583 } gmem;
@@ -1487,6 +1500,7 @@ public:
1487 SYNC, 1500 SYNC,
1488 BRK, 1501 BRK,
1489 DEPBAR, 1502 DEPBAR,
1503 VOTE,
1490 BFE_C, 1504 BFE_C,
1491 BFE_R, 1505 BFE_R,
1492 BFE_IMM, 1506 BFE_IMM,
@@ -1649,6 +1663,7 @@ public:
1649 Hfma2, 1663 Hfma2,
1650 Flow, 1664 Flow,
1651 Synch, 1665 Synch,
1666 Warp,
1652 Memory, 1667 Memory,
1653 Texture, 1668 Texture,
1654 Image, 1669 Image,
@@ -1775,6 +1790,7 @@ private:
1775 INST("111000110100---", Id::BRK, Type::Flow, "BRK"), 1790 INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
1776 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 1791 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
1777 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 1792 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
1793 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
1778 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 1794 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
1779 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), 1795 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
1780 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), 1796 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 85424a4c9..03d434b28 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -27,6 +27,8 @@ Device::Device() {
27 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 27 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
28 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 28 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
29 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 29 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
30 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
31 GLAD_GL_NV_shader_thread_shuffle;
30 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; 32 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
31 has_variable_aoffi = TestVariableAoffi(); 33 has_variable_aoffi = TestVariableAoffi();
32 has_component_indexing_bug = TestComponentIndexingBug(); 34 has_component_indexing_bug = TestComponentIndexingBug();
@@ -36,6 +38,7 @@ Device::Device(std::nullptr_t) {
36 uniform_buffer_alignment = 0; 38 uniform_buffer_alignment = 0;
37 max_vertex_attributes = 16; 39 max_vertex_attributes = 16;
38 max_varyings = 15; 40 max_varyings = 15;
41 has_warp_intrinsics = true;
39 has_vertex_viewport_layer = true; 42 has_vertex_viewport_layer = true;
40 has_variable_aoffi = true; 43 has_variable_aoffi = true;
41 has_component_indexing_bug = false; 44 has_component_indexing_bug = false;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index dc883722d..3ef7c6dd8 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -30,6 +30,10 @@ public:
30 return max_varyings; 30 return max_varyings;
31 } 31 }
32 32
33 bool HasWarpIntrinsics() const {
34 return has_warp_intrinsics;
35 }
36
33 bool HasVertexViewportLayer() const { 37 bool HasVertexViewportLayer() const {
34 return has_vertex_viewport_layer; 38 return has_vertex_viewport_layer;
35 } 39 }
@@ -50,6 +54,7 @@ private:
50 std::size_t shader_storage_alignment{}; 54 std::size_t shader_storage_alignment{};
51 u32 max_vertex_attributes{}; 55 u32 max_vertex_attributes{};
52 u32 max_varyings{}; 56 u32 max_varyings{};
57 bool has_warp_intrinsics{};
53 bool has_vertex_viewport_layer{}; 58 bool has_vertex_viewport_layer{};
54 bool has_variable_aoffi{}; 59 bool has_variable_aoffi{};
55 bool has_component_indexing_bug{}; 60 bool has_component_indexing_bug{};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1c90facc3..a32a7e984 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -212,7 +212,9 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
212 const auto texture_buffer_usage{variant.texture_buffer_usage}; 212 const auto texture_buffer_usage{variant.texture_buffer_usage};
213 213
214 std::string source = "#version 430 core\n" 214 std::string source = "#version 430 core\n"
215 "#extension GL_ARB_separate_shader_objects : enable\n"; 215 "#extension GL_ARB_separate_shader_objects : enable\n"
216 "#extension GL_NV_gpu_shader5 : enable\n"
217 "#extension GL_NV_shader_thread_group : enable\n";
216 if (entries.shader_viewport_layer_array) { 218 if (entries.shader_viewport_layer_array) {
217 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; 219 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
218 } 220 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d8f722c26..1bfdbcd61 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1735,6 +1735,48 @@ private:
1735 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; 1735 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
1736 } 1736 }
1737 1737
1738 std::string BallotThread(Operation operation) {
1739 const std::string value = VisitOperand(operation, 0, Type::Bool);
1740 if (!device.HasWarpIntrinsics()) {
1741 LOG_ERROR(Render_OpenGL,
1742 "Nvidia warp intrinsics are not available and its required by a shader");
1743 // Stub on non-Nvidia devices by simulating all threads voting the same as the active
1744 // one.
1745 return fmt::format("utof({} ? 0xFFFFFFFFU : 0U)", value);
1746 }
1747 return fmt::format("utof(ballotThreadNV({}))", value);
1748 }
1749
1750 std::string Vote(Operation operation, const char* func) {
1751 const std::string value = VisitOperand(operation, 0, Type::Bool);
1752 if (!device.HasWarpIntrinsics()) {
1753 LOG_ERROR(Render_OpenGL,
1754 "Nvidia vote intrinsics are not available and its required by a shader");
1755 // Stub with a warp size of one.
1756 return value;
1757 }
1758 return fmt::format("{}({})", func, value);
1759 }
1760
1761 std::string VoteAll(Operation operation) {
1762 return Vote(operation, "allThreadsNV");
1763 }
1764
1765 std::string VoteAny(Operation operation) {
1766 return Vote(operation, "anyThreadNV");
1767 }
1768
1769 std::string VoteEqual(Operation operation) {
1770 if (!device.HasWarpIntrinsics()) {
1771 LOG_ERROR(Render_OpenGL,
1772 "Nvidia vote intrinsics are not available and its required by a shader");
1773 // We must return true here since a stub for a theoretical warp size of 1 will always
1774 // return an equal result for all its votes.
1775 return "true";
1776 }
1777 return Vote(operation, "allThreadsEqualNV");
1778 }
1779
1738 static constexpr std::array operation_decompilers = { 1780 static constexpr std::array operation_decompilers = {
1739 &GLSLDecompiler::Assign, 1781 &GLSLDecompiler::Assign,
1740 1782
@@ -1885,6 +1927,11 @@ private:
1885 &GLSLDecompiler::WorkGroupId<0>, 1927 &GLSLDecompiler::WorkGroupId<0>,
1886 &GLSLDecompiler::WorkGroupId<1>, 1928 &GLSLDecompiler::WorkGroupId<1>,
1887 &GLSLDecompiler::WorkGroupId<2>, 1929 &GLSLDecompiler::WorkGroupId<2>,
1930
1931 &GLSLDecompiler::BallotThread,
1932 &GLSLDecompiler::VoteAll,
1933 &GLSLDecompiler::VoteAny,
1934 &GLSLDecompiler::VoteEqual,
1888 }; 1935 };
1889 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 1936 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1890 1937
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 24a591797..a35b45c9c 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1072,6 +1072,26 @@ private:
1072 return {}; 1072 return {};
1073 } 1073 }
1074 1074
1075 Id BallotThread(Operation) {
1076 UNIMPLEMENTED();
1077 return {};
1078 }
1079
1080 Id VoteAll(Operation) {
1081 UNIMPLEMENTED();
1082 return {};
1083 }
1084
1085 Id VoteAny(Operation) {
1086 UNIMPLEMENTED();
1087 return {};
1088 }
1089
1090 Id VoteEqual(Operation) {
1091 UNIMPLEMENTED();
1092 return {};
1093 }
1094
1075 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, 1095 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
1076 const std::string& name) { 1096 const std::string& name) {
1077 const Id id = OpVariable(type, storage); 1097 const Id id = OpVariable(type, storage);
@@ -1364,6 +1384,11 @@ private:
1364 &SPIRVDecompiler::WorkGroupId<0>, 1384 &SPIRVDecompiler::WorkGroupId<0>,
1365 &SPIRVDecompiler::WorkGroupId<1>, 1385 &SPIRVDecompiler::WorkGroupId<1>,
1366 &SPIRVDecompiler::WorkGroupId<2>, 1386 &SPIRVDecompiler::WorkGroupId<2>,
1387
1388 &SPIRVDecompiler::BallotThread,
1389 &SPIRVDecompiler::VoteAll,
1390 &SPIRVDecompiler::VoteAny,
1391 &SPIRVDecompiler::VoteEqual,
1367 }; 1392 };
1368 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 1393 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1369 1394
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index b547d8323..47a9fd961 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -176,6 +176,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
176 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, 176 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
177 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 177 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
178 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 178 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
179 {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
179 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 180 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
180 {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, 181 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
181 {OpCode::Type::Image, &ShaderIR::DecodeImage}, 182 {OpCode::Type::Image, &ShaderIR::DecodeImage},
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
new file mode 100644
index 000000000..04ca74f46
--- /dev/null
+++ b/src/video_core/shader/decode/warp.cpp
@@ -0,0 +1,55 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16using Tegra::Shader::VoteOperation;
17
18namespace {
19OperationCode GetOperationCode(VoteOperation vote_op) {
20 switch (vote_op) {
21 case VoteOperation::All:
22 return OperationCode::VoteAll;
23 case VoteOperation::Any:
24 return OperationCode::VoteAny;
25 case VoteOperation::Eq:
26 return OperationCode::VoteEqual;
27 default:
28 UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op));
29 return OperationCode::VoteAll;
30 }
31}
32} // Anonymous namespace
33
34u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
35 const Instruction instr = {program_code[pc]};
36 const auto opcode = OpCode::Decode(instr);
37
38 switch (opcode->get().GetId()) {
39 case OpCode::Id::VOTE: {
40 const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
41 const Node active = Operation(OperationCode::BallotThread, value);
42 const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
43 SetRegister(bb, instr.gpr0, active);
44 SetPredicate(bb, instr.vote.dest_pred, vote);
45 break;
46 }
47 default:
48 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
49 break;
50 }
51
52 return pc;
53}
54
55} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 5f0852364..5db9313c4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -168,6 +168,11 @@ enum class OperationCode {
168 WorkGroupIdY, /// () -> uint 168 WorkGroupIdY, /// () -> uint
169 WorkGroupIdZ, /// () -> uint 169 WorkGroupIdZ, /// () -> uint
170 170
171 BallotThread, /// (bool) -> uint
172 VoteAll, /// (bool) -> bool
173 VoteAny, /// (bool) -> bool
174 VoteEqual, /// (bool) -> bool
175
171 Amount, 176 Amount,
172}; 177};
173 178
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 59a083d90..99d06ff4a 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -167,6 +167,7 @@ private:
167 u32 DecodeFfma(NodeBlock& bb, u32 pc); 167 u32 DecodeFfma(NodeBlock& bb, u32 pc);
168 u32 DecodeHfma2(NodeBlock& bb, u32 pc); 168 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
169 u32 DecodeConversion(NodeBlock& bb, u32 pc); 169 u32 DecodeConversion(NodeBlock& bb, u32 pc);
170 u32 DecodeWarp(NodeBlock& bb, u32 pc);
170 u32 DecodeMemory(NodeBlock& bb, u32 pc); 171 u32 DecodeMemory(NodeBlock& bb, u32 pc);
171 u32 DecodeTexture(NodeBlock& bb, u32 pc); 172 u32 DecodeTexture(NodeBlock& bb, u32 pc);
172 u32 DecodeImage(NodeBlock& bb, u32 pc); 173 u32 DecodeImage(NodeBlock& bb, u32 pc);