summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/shader_bytecode.h18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp63
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp50
-rw-r--r--src/video_core/shader/decode/warp.cpp47
-rw-r--r--src/video_core/shader/node.h10
6 files changed, 182 insertions, 9 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 052e6d24e..a6110bd86 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -566,6 +566,13 @@ enum class ImageAtomicOperation : u64 {
566 Exch = 8, 566 Exch = 8,
567}; 567};
568 568
569enum class ShuffleOperation : u64 {
570 Idx = 0, // shuffleNV
571 Up = 1, // shuffleUpNV
572 Down = 2, // shuffleDownNV
573 Bfly = 3, // shuffleXorNV
574};
575
569union Instruction { 576union Instruction {
570 Instruction& operator=(const Instruction& instr) { 577 Instruction& operator=(const Instruction& instr) {
571 value = instr.value; 578 value = instr.value;
@@ -600,6 +607,15 @@ union Instruction {
600 } vote; 607 } vote;
601 608
602 union { 609 union {
610 BitField<30, 2, ShuffleOperation> operation;
611 BitField<48, 3, u64> pred48;
612 BitField<28, 1, u64> is_index_imm;
613 BitField<29, 1, u64> is_mask_imm;
614 BitField<20, 5, u64> index_imm;
615 BitField<34, 13, u64> mask_imm;
616 } shfl;
617
618 union {
603 BitField<8, 8, Register> gpr; 619 BitField<8, 8, Register> gpr;
604 BitField<20, 24, s64> offset; 620 BitField<20, 24, s64> offset;
605 } gmem; 621 } gmem;
@@ -1542,6 +1558,7 @@ public:
1542 BRK, 1558 BRK,
1543 DEPBAR, 1559 DEPBAR,
1544 VOTE, 1560 VOTE,
1561 SHFL,
1545 BFE_C, 1562 BFE_C,
1546 BFE_R, 1563 BFE_R,
1547 BFE_IMM, 1564 BFE_IMM,
@@ -1833,6 +1850,7 @@ private:
1833 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 1850 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
1834 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 1851 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
1835 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), 1852 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
1853 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
1836 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 1854 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
1837 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), 1855 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
1838 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), 1856 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 909ccb82c..0dbc4c02f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
214 std::string source = "#version 430 core\n" 214 std::string source = "#version 430 core\n"
215 "#extension GL_ARB_separate_shader_objects : enable\n" 215 "#extension GL_ARB_separate_shader_objects : enable\n"
216 "#extension GL_NV_gpu_shader5 : enable\n" 216 "#extension GL_NV_gpu_shader5 : enable\n"
217 "#extension GL_NV_shader_thread_group : enable\n"; 217 "#extension GL_NV_shader_thread_group : enable\n"
218 "#extension GL_NV_shader_thread_shuffle : enable\n";
218 if (entries.shader_viewport_layer_array) { 219 if (entries.shader_viewport_layer_array) {
219 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; 220 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
220 } 221 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 137b23740..6b31ba0f2 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1934,8 +1934,7 @@ private:
1934 Expression BallotThread(Operation operation) { 1934 Expression BallotThread(Operation operation) {
1935 const std::string value = VisitOperand(operation, 0).AsBool(); 1935 const std::string value = VisitOperand(operation, 0).AsBool();
1936 if (!device.HasWarpIntrinsics()) { 1936 if (!device.HasWarpIntrinsics()) {
1937 LOG_ERROR(Render_OpenGL, 1937 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
1938 "Nvidia warp intrinsics are not available and its required by a shader");
1939 // Stub on non-Nvidia devices by simulating all threads voting the same as the active 1938 // Stub on non-Nvidia devices by simulating all threads voting the same as the active
1940 // one. 1939 // one.
1941 return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; 1940 return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
@@ -1946,8 +1945,7 @@ private:
1946 Expression Vote(Operation operation, const char* func) { 1945 Expression Vote(Operation operation, const char* func) {
1947 const std::string value = VisitOperand(operation, 0).AsBool(); 1946 const std::string value = VisitOperand(operation, 0).AsBool();
1948 if (!device.HasWarpIntrinsics()) { 1947 if (!device.HasWarpIntrinsics()) {
1949 LOG_ERROR(Render_OpenGL, 1948 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
1950 "Nvidia vote intrinsics are not available and its required by a shader");
1951 // Stub with a warp size of one. 1949 // Stub with a warp size of one.
1952 return {value, Type::Bool}; 1950 return {value, Type::Bool};
1953 } 1951 }
@@ -1964,15 +1962,54 @@ private:
1964 1962
1965 Expression VoteEqual(Operation operation) { 1963 Expression VoteEqual(Operation operation) {
1966 if (!device.HasWarpIntrinsics()) { 1964 if (!device.HasWarpIntrinsics()) {
1967 LOG_ERROR(Render_OpenGL, 1965 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
1968 "Nvidia vote intrinsics are not available and its required by a shader"); 1966 // We must return true here since a stub for a theoretical warp size of 1.
1969 // We must return true here since a stub for a theoretical warp size of 1 will always 1967 // This will always return an equal result across all votes.
1970 // return an equal result for all its votes.
1971 return {"true", Type::Bool}; 1968 return {"true", Type::Bool};
1972 } 1969 }
1973 return Vote(operation, "allThreadsEqualNV"); 1970 return Vote(operation, "allThreadsEqualNV");
1974 } 1971 }
1975 1972
1973 template <const std::string_view& func>
1974 Expression Shuffle(Operation operation) {
1975 const std::string value = VisitOperand(operation, 0).AsFloat();
1976 if (!device.HasWarpIntrinsics()) {
1977 LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
1978 // On a "single-thread" device we are either on the same thread or out of bounds. Both
1979 // cases return the passed value.
1980 return {value, Type::Float};
1981 }
1982
1983 const std::string index = VisitOperand(operation, 1).AsUint();
1984 const std::string width = VisitOperand(operation, 2).AsUint();
1985 return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
1986 }
1987
1988 template <const std::string_view& func>
1989 Expression InRangeShuffle(Operation operation) {
1990 const std::string index = VisitOperand(operation, 0).AsUint();
1991 const std::string width = VisitOperand(operation, 1).AsUint();
1992 if (!device.HasWarpIntrinsics()) {
1993 // On a "single-thread" device we are only in bounds when the requested index is 0.
1994 return {fmt::format("({} == 0U)", index), Type::Bool};
1995 }
1996
1997 const std::string in_range = code.GenerateTemporary();
1998 code.AddLine("bool {};", in_range);
1999 code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
2000 return {in_range, Type::Bool};
2001 }
2002
2003 struct Func final {
2004 Func() = delete;
2005 ~Func() = delete;
2006
2007 static constexpr std::string_view ShuffleIndexed = "shuffleNV";
2008 static constexpr std::string_view ShuffleUp = "shuffleUpNV";
2009 static constexpr std::string_view ShuffleDown = "shuffleDownNV";
2010 static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
2011 };
2012
1976 static constexpr std::array operation_decompilers = { 2013 static constexpr std::array operation_decompilers = {
1977 &GLSLDecompiler::Assign, 2014 &GLSLDecompiler::Assign,
1978 2015
@@ -2135,6 +2172,16 @@ private:
2135 &GLSLDecompiler::VoteAll, 2172 &GLSLDecompiler::VoteAll,
2136 &GLSLDecompiler::VoteAny, 2173 &GLSLDecompiler::VoteAny,
2137 &GLSLDecompiler::VoteEqual, 2174 &GLSLDecompiler::VoteEqual,
2175
2176 &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
2177 &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
2178 &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
2179 &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
2180
2181 &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
2182 &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
2183 &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
2184 &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
2138 }; 2185 };
2139 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2186 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2140 2187
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b9153934e..f7fbbb6e4 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1127,6 +1127,46 @@ private:
1127 return {}; 1127 return {};
1128 } 1128 }
1129 1129
1130 Id ShuffleIndexed(Operation) {
1131 UNIMPLEMENTED();
1132 return {};
1133 }
1134
1135 Id ShuffleUp(Operation) {
1136 UNIMPLEMENTED();
1137 return {};
1138 }
1139
1140 Id ShuffleDown(Operation) {
1141 UNIMPLEMENTED();
1142 return {};
1143 }
1144
1145 Id ShuffleButterfly(Operation) {
1146 UNIMPLEMENTED();
1147 return {};
1148 }
1149
1150 Id InRangeShuffleIndexed(Operation) {
1151 UNIMPLEMENTED();
1152 return {};
1153 }
1154
1155 Id InRangeShuffleUp(Operation) {
1156 UNIMPLEMENTED();
1157 return {};
1158 }
1159
1160 Id InRangeShuffleDown(Operation) {
1161 UNIMPLEMENTED();
1162 return {};
1163 }
1164
1165 Id InRangeShuffleButterfly(Operation) {
1166 UNIMPLEMENTED();
1167 return {};
1168 }
1169
1130 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, 1170 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
1131 const std::string& name) { 1171 const std::string& name) {
1132 const Id id = OpVariable(type, storage); 1172 const Id id = OpVariable(type, storage);
@@ -1431,6 +1471,16 @@ private:
1431 &SPIRVDecompiler::VoteAll, 1471 &SPIRVDecompiler::VoteAll,
1432 &SPIRVDecompiler::VoteAny, 1472 &SPIRVDecompiler::VoteAny,
1433 &SPIRVDecompiler::VoteEqual, 1473 &SPIRVDecompiler::VoteEqual,
1474
1475 &SPIRVDecompiler::ShuffleIndexed,
1476 &SPIRVDecompiler::ShuffleUp,
1477 &SPIRVDecompiler::ShuffleDown,
1478 &SPIRVDecompiler::ShuffleButterfly,
1479
1480 &SPIRVDecompiler::InRangeShuffleIndexed,
1481 &SPIRVDecompiler::InRangeShuffleUp,
1482 &SPIRVDecompiler::InRangeShuffleDown,
1483 &SPIRVDecompiler::InRangeShuffleButterfly,
1434 }; 1484 };
1435 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 1485 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1436 1486
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index 04ca74f46..a8e481b3c 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -13,6 +13,7 @@ namespace VideoCommon::Shader {
13using Tegra::Shader::Instruction; 13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred; 15using Tegra::Shader::Pred;
16using Tegra::Shader::ShuffleOperation;
16using Tegra::Shader::VoteOperation; 17using Tegra::Shader::VoteOperation;
17 18
18namespace { 19namespace {
@@ -44,6 +45,52 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
44 SetPredicate(bb, instr.vote.dest_pred, vote); 45 SetPredicate(bb, instr.vote.dest_pred, vote);
45 break; 46 break;
46 } 47 }
48 case OpCode::Id::SHFL: {
49 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
50 : GetRegister(instr.gpr39);
51 Node width = [&] {
52 // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
53 // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
54 // different parameters that don't properly map to GLSL's interface, but it should work
55 // for cases emitted by Nvidia's compiler.
56 if (instr.shfl.operation == ShuffleOperation::Up) {
57 return Operation(
58 OperationCode::ILogicalShiftRight,
59 Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
60 Immediate(8));
61 } else {
62 return Operation(OperationCode::ILogicalShiftRight,
63 Operation(OperationCode::IAdd, Immediate(0x201F),
64 Operation(OperationCode::INegate, std::move(mask))),
65 Immediate(8));
66 }
67 }();
68
69 const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
70 switch (instr.shfl.operation) {
71 case ShuffleOperation::Idx:
72 return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
73 case ShuffleOperation::Up:
74 return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
75 case ShuffleOperation::Down:
76 return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
77 case ShuffleOperation::Bfly:
78 return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
79 }
80 UNREACHABLE_MSG("Invalid SHFL operation: {}",
81 static_cast<u64>(instr.shfl.operation.Value()));
82 return {};
83 }();
84
85 // Setting the predicate before the register is intentional to avoid overwriting.
86 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
87 : GetRegister(instr.gpr20);
88 SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
89 SetRegister(
90 bb, instr.gpr0,
91 Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
92 break;
93 }
47 default: 94 default:
48 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); 95 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
49 break; 96 break;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index b47b201cf..86de7e0a3 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -181,6 +181,16 @@ enum class OperationCode {
181 VoteAny, /// (bool) -> bool 181 VoteAny, /// (bool) -> bool
182 VoteEqual, /// (bool) -> bool 182 VoteEqual, /// (bool) -> bool
183 183
184 ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
185 ShuffleUp, /// (uint value, uint index, uint width) -> uint
186 ShuffleDown, /// (uint value, uint index, uint width) -> uint
187 ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
188
189 InRangeShuffleIndexed, /// (uint index, uint width) -> bool
190 InRangeShuffleUp, /// (uint index, uint width) -> bool
191 InRangeShuffleDown, /// (uint index, uint width) -> bool
192 InRangeShuffleButterfly, /// (uint index, uint width) -> bool
193
184 Amount, 194 Amount,
185}; 195};
186 196