summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar bunnei2019-09-20 17:10:42 -0400
committerGravatar GitHub2019-09-20 17:10:42 -0400
commit88d857499b6168d7bcea9b91fa5bdd8b0144c07a (patch)
tree1efa1eddb43c0afd3003b449675997abc9a7637f
parentMerge pull request #2784 from ReinUsesLisp/smem (diff)
parentshader_ir/warp: Implement SHFL (diff)
downloadyuzu-88d857499b6168d7bcea9b91fa5bdd8b0144c07a.tar.gz
yuzu-88d857499b6168d7bcea9b91fa5bdd8b0144c07a.tar.xz
yuzu-88d857499b6168d7bcea9b91fa5bdd8b0144c07a.zip
Merge pull request #2855 from ReinUsesLisp/shfl
shader_ir/warp: Implement SHFL for Nvidia devices
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/shader_bytecode.h18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp63
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp50
-rw-r--r--src/video_core/shader/decode/warp.cpp47
-rw-r--r--src/video_core/shader/node.h10
6 files changed, 182 insertions, 9 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 052e6d24e..a6110bd86 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -566,6 +566,13 @@ enum class ImageAtomicOperation : u64 {
566 Exch = 8, 566 Exch = 8,
567}; 567};
568 568
569enum class ShuffleOperation : u64 {
570 Idx = 0, // shuffleNV
571 Up = 1, // shuffleUpNV
572 Down = 2, // shuffleDownNV
573 Bfly = 3, // shuffleXorNV
574};
575
569union Instruction { 576union Instruction {
570 Instruction& operator=(const Instruction& instr) { 577 Instruction& operator=(const Instruction& instr) {
571 value = instr.value; 578 value = instr.value;
@@ -600,6 +607,15 @@ union Instruction {
600 } vote; 607 } vote;
601 608
602 union { 609 union {
610 BitField<30, 2, ShuffleOperation> operation;
611 BitField<48, 3, u64> pred48;
612 BitField<28, 1, u64> is_index_imm;
613 BitField<29, 1, u64> is_mask_imm;
614 BitField<20, 5, u64> index_imm;
615 BitField<34, 13, u64> mask_imm;
616 } shfl;
617
618 union {
603 BitField<8, 8, Register> gpr; 619 BitField<8, 8, Register> gpr;
604 BitField<20, 24, s64> offset; 620 BitField<20, 24, s64> offset;
605 } gmem; 621 } gmem;
@@ -1542,6 +1558,7 @@ public:
1542 BRK, 1558 BRK,
1543 DEPBAR, 1559 DEPBAR,
1544 VOTE, 1560 VOTE,
1561 SHFL,
1545 BFE_C, 1562 BFE_C,
1546 BFE_R, 1563 BFE_R,
1547 BFE_IMM, 1564 BFE_IMM,
@@ -1833,6 +1850,7 @@ private:
1833 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 1850 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
1834 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 1851 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
1835 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), 1852 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
1853 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
1836 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 1854 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
1837 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), 1855 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
1838 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), 1856 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 909ccb82c..0dbc4c02f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
214 std::string source = "#version 430 core\n" 214 std::string source = "#version 430 core\n"
215 "#extension GL_ARB_separate_shader_objects : enable\n" 215 "#extension GL_ARB_separate_shader_objects : enable\n"
216 "#extension GL_NV_gpu_shader5 : enable\n" 216 "#extension GL_NV_gpu_shader5 : enable\n"
217 "#extension GL_NV_shader_thread_group : enable\n"; 217 "#extension GL_NV_shader_thread_group : enable\n"
218 "#extension GL_NV_shader_thread_shuffle : enable\n";
218 if (entries.shader_viewport_layer_array) { 219 if (entries.shader_viewport_layer_array) {
219 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; 220 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
220 } 221 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 14834d86a..6c5402e33 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1953,8 +1953,7 @@ private:
1953 Expression BallotThread(Operation operation) { 1953 Expression BallotThread(Operation operation) {
1954 const std::string value = VisitOperand(operation, 0).AsBool(); 1954 const std::string value = VisitOperand(operation, 0).AsBool();
1955 if (!device.HasWarpIntrinsics()) { 1955 if (!device.HasWarpIntrinsics()) {
1956 LOG_ERROR(Render_OpenGL, 1956 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
1957 "Nvidia warp intrinsics are not available and its required by a shader");
1958 // Stub on non-Nvidia devices by simulating all threads voting the same as the active 1957 // Stub on non-Nvidia devices by simulating all threads voting the same as the active
1959 // one. 1958 // one.
1960 return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; 1959 return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
@@ -1965,8 +1964,7 @@ private:
1965 Expression Vote(Operation operation, const char* func) { 1964 Expression Vote(Operation operation, const char* func) {
1966 const std::string value = VisitOperand(operation, 0).AsBool(); 1965 const std::string value = VisitOperand(operation, 0).AsBool();
1967 if (!device.HasWarpIntrinsics()) { 1966 if (!device.HasWarpIntrinsics()) {
1968 LOG_ERROR(Render_OpenGL, 1967 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
1969 "Nvidia vote intrinsics are not available and its required by a shader");
1970 // Stub with a warp size of one. 1968 // Stub with a warp size of one.
1971 return {value, Type::Bool}; 1969 return {value, Type::Bool};
1972 } 1970 }
@@ -1983,15 +1981,54 @@ private:
1983 1981
1984 Expression VoteEqual(Operation operation) { 1982 Expression VoteEqual(Operation operation) {
1985 if (!device.HasWarpIntrinsics()) { 1983 if (!device.HasWarpIntrinsics()) {
1986 LOG_ERROR(Render_OpenGL, 1984 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
1987 "Nvidia vote intrinsics are not available and its required by a shader"); 1985 // We must return true here since a stub for a theoretical warp size of 1.
1988 // We must return true here since a stub for a theoretical warp size of 1 will always 1986 // This will always return an equal result across all votes.
1989 // return an equal result for all its votes.
1990 return {"true", Type::Bool}; 1987 return {"true", Type::Bool};
1991 } 1988 }
1992 return Vote(operation, "allThreadsEqualNV"); 1989 return Vote(operation, "allThreadsEqualNV");
1993 } 1990 }
1994 1991
1992 template <const std::string_view& func>
1993 Expression Shuffle(Operation operation) {
1994 const std::string value = VisitOperand(operation, 0).AsFloat();
1995 if (!device.HasWarpIntrinsics()) {
1996 LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
1997 // On a "single-thread" device we are either on the same thread or out of bounds. Both
1998 // cases return the passed value.
1999 return {value, Type::Float};
2000 }
2001
2002 const std::string index = VisitOperand(operation, 1).AsUint();
2003 const std::string width = VisitOperand(operation, 2).AsUint();
2004 return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
2005 }
2006
2007 template <const std::string_view& func>
2008 Expression InRangeShuffle(Operation operation) {
2009 const std::string index = VisitOperand(operation, 0).AsUint();
2010 const std::string width = VisitOperand(operation, 1).AsUint();
2011 if (!device.HasWarpIntrinsics()) {
2012 // On a "single-thread" device we are only in bounds when the requested index is 0.
2013 return {fmt::format("({} == 0U)", index), Type::Bool};
2014 }
2015
2016 const std::string in_range = code.GenerateTemporary();
2017 code.AddLine("bool {};", in_range);
2018 code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
2019 return {in_range, Type::Bool};
2020 }
2021
2022 struct Func final {
2023 Func() = delete;
2024 ~Func() = delete;
2025
2026 static constexpr std::string_view ShuffleIndexed = "shuffleNV";
2027 static constexpr std::string_view ShuffleUp = "shuffleUpNV";
2028 static constexpr std::string_view ShuffleDown = "shuffleDownNV";
2029 static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
2030 };
2031
1995 static constexpr std::array operation_decompilers = { 2032 static constexpr std::array operation_decompilers = {
1996 &GLSLDecompiler::Assign, 2033 &GLSLDecompiler::Assign,
1997 2034
@@ -2154,6 +2191,16 @@ private:
2154 &GLSLDecompiler::VoteAll, 2191 &GLSLDecompiler::VoteAll,
2155 &GLSLDecompiler::VoteAny, 2192 &GLSLDecompiler::VoteAny,
2156 &GLSLDecompiler::VoteEqual, 2193 &GLSLDecompiler::VoteEqual,
2194
2195 &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
2196 &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
2197 &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
2198 &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
2199
2200 &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
2201 &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
2202 &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
2203 &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
2157 }; 2204 };
2158 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2205 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2159 2206
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b9153934e..f7fbbb6e4 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1127,6 +1127,46 @@ private:
1127 return {}; 1127 return {};
1128 } 1128 }
1129 1129
1130 Id ShuffleIndexed(Operation) {
1131 UNIMPLEMENTED();
1132 return {};
1133 }
1134
1135 Id ShuffleUp(Operation) {
1136 UNIMPLEMENTED();
1137 return {};
1138 }
1139
1140 Id ShuffleDown(Operation) {
1141 UNIMPLEMENTED();
1142 return {};
1143 }
1144
1145 Id ShuffleButterfly(Operation) {
1146 UNIMPLEMENTED();
1147 return {};
1148 }
1149
1150 Id InRangeShuffleIndexed(Operation) {
1151 UNIMPLEMENTED();
1152 return {};
1153 }
1154
1155 Id InRangeShuffleUp(Operation) {
1156 UNIMPLEMENTED();
1157 return {};
1158 }
1159
1160 Id InRangeShuffleDown(Operation) {
1161 UNIMPLEMENTED();
1162 return {};
1163 }
1164
1165 Id InRangeShuffleButterfly(Operation) {
1166 UNIMPLEMENTED();
1167 return {};
1168 }
1169
1130 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, 1170 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
1131 const std::string& name) { 1171 const std::string& name) {
1132 const Id id = OpVariable(type, storage); 1172 const Id id = OpVariable(type, storage);
@@ -1431,6 +1471,16 @@ private:
1431 &SPIRVDecompiler::VoteAll, 1471 &SPIRVDecompiler::VoteAll,
1432 &SPIRVDecompiler::VoteAny, 1472 &SPIRVDecompiler::VoteAny,
1433 &SPIRVDecompiler::VoteEqual, 1473 &SPIRVDecompiler::VoteEqual,
1474
1475 &SPIRVDecompiler::ShuffleIndexed,
1476 &SPIRVDecompiler::ShuffleUp,
1477 &SPIRVDecompiler::ShuffleDown,
1478 &SPIRVDecompiler::ShuffleButterfly,
1479
1480 &SPIRVDecompiler::InRangeShuffleIndexed,
1481 &SPIRVDecompiler::InRangeShuffleUp,
1482 &SPIRVDecompiler::InRangeShuffleDown,
1483 &SPIRVDecompiler::InRangeShuffleButterfly,
1434 }; 1484 };
1435 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 1485 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1436 1486
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index 04ca74f46..a8e481b3c 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -13,6 +13,7 @@ namespace VideoCommon::Shader {
13using Tegra::Shader::Instruction; 13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred; 15using Tegra::Shader::Pred;
16using Tegra::Shader::ShuffleOperation;
16using Tegra::Shader::VoteOperation; 17using Tegra::Shader::VoteOperation;
17 18
18namespace { 19namespace {
@@ -44,6 +45,52 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
44 SetPredicate(bb, instr.vote.dest_pred, vote); 45 SetPredicate(bb, instr.vote.dest_pred, vote);
45 break; 46 break;
46 } 47 }
48 case OpCode::Id::SHFL: {
49 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
50 : GetRegister(instr.gpr39);
51 Node width = [&] {
52 // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
53 // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
54 // different parameters that don't properly map to GLSL's interface, but it should work
55 // for cases emitted by Nvidia's compiler.
56 if (instr.shfl.operation == ShuffleOperation::Up) {
57 return Operation(
58 OperationCode::ILogicalShiftRight,
59 Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
60 Immediate(8));
61 } else {
62 return Operation(OperationCode::ILogicalShiftRight,
63 Operation(OperationCode::IAdd, Immediate(0x201F),
64 Operation(OperationCode::INegate, std::move(mask))),
65 Immediate(8));
66 }
67 }();
68
69 const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
70 switch (instr.shfl.operation) {
71 case ShuffleOperation::Idx:
72 return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
73 case ShuffleOperation::Up:
74 return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
75 case ShuffleOperation::Down:
76 return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
77 case ShuffleOperation::Bfly:
78 return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
79 }
80 UNREACHABLE_MSG("Invalid SHFL operation: {}",
81 static_cast<u64>(instr.shfl.operation.Value()));
82 return {};
83 }();
84
85 // Setting the predicate before the register is intentional to avoid overwriting.
86 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
87 : GetRegister(instr.gpr20);
88 SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
89 SetRegister(
90 bb, instr.gpr0,
91 Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
92 break;
93 }
47 default: 94 default:
48 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); 95 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
49 break; 96 break;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 425111cc4..abf2cb1ab 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -181,6 +181,16 @@ enum class OperationCode {
181 VoteAny, /// (bool) -> bool 181 VoteAny, /// (bool) -> bool
182 VoteEqual, /// (bool) -> bool 182 VoteEqual, /// (bool) -> bool
183 183
184 ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
185 ShuffleUp, /// (uint value, uint index, uint width) -> uint
186 ShuffleDown, /// (uint value, uint index, uint width) -> uint
187 ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
188
189 InRangeShuffleIndexed, /// (uint index, uint width) -> bool
190 InRangeShuffleUp, /// (uint index, uint width) -> bool
191 InRangeShuffleDown, /// (uint index, uint width) -> bool
192 InRangeShuffleButterfly, /// (uint index, uint width) -> bool
193
184 Amount, 194 Amount,
185}; 195};
186 196