summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp48
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp43
-rw-r--r--src/video_core/shader/decode/warp.cpp68
-rw-r--r--src/video_core/shader/node.h11
5 files changed, 49 insertions, 122 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f1b89165d..a287b5ee1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -275,6 +275,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
275 std::string source = fmt::format(R"(// {} 275 std::string source = fmt::format(R"(// {}
276#version 430 core 276#version 430 core
277#extension GL_ARB_separate_shader_objects : enable 277#extension GL_ARB_separate_shader_objects : enable
278#extension GL_ARB_shader_ballot : enable
278#extension GL_ARB_shader_viewport_layer_array : enable 279#extension GL_ARB_shader_viewport_layer_array : enable
279#extension GL_EXT_shader_image_load_formatted : enable 280#extension GL_EXT_shader_image_load_formatted : enable
280#extension GL_NV_gpu_shader5 : enable 281#extension GL_NV_gpu_shader5 : enable
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 92ee8459e..ca4e6e468 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1942,34 +1942,14 @@ private:
1942 return Vote(operation, "allThreadsEqualNV"); 1942 return Vote(operation, "allThreadsEqualNV");
1943 } 1943 }
1944 1944
1945 template <const std::string_view& func> 1945 Expression ThreadId(Operation operation) {
1946 Expression Shuffle(Operation operation) { 1946 return {"gl_SubGroupInvocationARB", Type::Uint};
1947 const std::string value = VisitOperand(operation, 0).AsFloat();
1948 if (!device.HasWarpIntrinsics()) {
1949 LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
1950 // On a "single-thread" device we are either on the same thread or out of bounds. Both
1951 // cases return the passed value.
1952 return {value, Type::Float};
1953 }
1954
1955 const std::string index = VisitOperand(operation, 1).AsUint();
1956 const std::string width = VisitOperand(operation, 2).AsUint();
1957 return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
1958 } 1947 }
1959 1948
1960 template <const std::string_view& func> 1949 Expression ShuffleIndexed(Operation operation) {
1961 Expression InRangeShuffle(Operation operation) { 1950 const std::string value = VisitOperand(operation, 0).AsFloat();
1962 const std::string index = VisitOperand(operation, 0).AsUint(); 1951 const std::string index = VisitOperand(operation, 1).AsUint();
1963 const std::string width = VisitOperand(operation, 1).AsUint(); 1952 return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
1964 if (!device.HasWarpIntrinsics()) {
1965 // On a "single-thread" device we are only in bounds when the requested index is 0.
1966 return {fmt::format("({} == 0U)", index), Type::Bool};
1967 }
1968
1969 const std::string in_range = code.GenerateTemporary();
1970 code.AddLine("bool {};", in_range);
1971 code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
1972 return {in_range, Type::Bool};
1973 } 1953 }
1974 1954
1975 struct Func final { 1955 struct Func final {
@@ -1981,11 +1961,6 @@ private:
1981 static constexpr std::string_view Or = "Or"; 1961 static constexpr std::string_view Or = "Or";
1982 static constexpr std::string_view Xor = "Xor"; 1962 static constexpr std::string_view Xor = "Xor";
1983 static constexpr std::string_view Exchange = "Exchange"; 1963 static constexpr std::string_view Exchange = "Exchange";
1984
1985 static constexpr std::string_view ShuffleIndexed = "shuffleNV";
1986 static constexpr std::string_view ShuffleUp = "shuffleUpNV";
1987 static constexpr std::string_view ShuffleDown = "shuffleDownNV";
1988 static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
1989 }; 1964 };
1990 1965
1991 static constexpr std::array operation_decompilers = { 1966 static constexpr std::array operation_decompilers = {
@@ -2151,15 +2126,8 @@ private:
2151 &GLSLDecompiler::VoteAny, 2126 &GLSLDecompiler::VoteAny,
2152 &GLSLDecompiler::VoteEqual, 2127 &GLSLDecompiler::VoteEqual,
2153 2128
2154 &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, 2129 &GLSLDecompiler::ThreadId,
2155 &GLSLDecompiler::Shuffle<Func::ShuffleUp>, 2130 &GLSLDecompiler::ShuffleIndexed,
2156 &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
2157 &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
2158
2159 &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
2160 &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
2161 &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
2162 &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
2163 }; 2131 };
2164 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2132 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2165 2133
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 42cf068b6..383720ea1 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1195,42 +1195,12 @@ private:
1195 return {}; 1195 return {};
1196 } 1196 }
1197 1197
1198 Id ShuffleIndexed(Operation) { 1198 Id ThreadId(Operation) {
1199 UNIMPLEMENTED();
1200 return {};
1201 }
1202
1203 Id ShuffleUp(Operation) {
1204 UNIMPLEMENTED();
1205 return {};
1206 }
1207
1208 Id ShuffleDown(Operation) {
1209 UNIMPLEMENTED();
1210 return {};
1211 }
1212
1213 Id ShuffleButterfly(Operation) {
1214 UNIMPLEMENTED(); 1199 UNIMPLEMENTED();
1215 return {}; 1200 return {};
1216 } 1201 }
1217 1202
1218 Id InRangeShuffleIndexed(Operation) { 1203 Id ShuffleIndexed(Operation) {
1219 UNIMPLEMENTED();
1220 return {};
1221 }
1222
1223 Id InRangeShuffleUp(Operation) {
1224 UNIMPLEMENTED();
1225 return {};
1226 }
1227
1228 Id InRangeShuffleDown(Operation) {
1229 UNIMPLEMENTED();
1230 return {};
1231 }
1232
1233 Id InRangeShuffleButterfly(Operation) {
1234 UNIMPLEMENTED(); 1204 UNIMPLEMENTED();
1235 return {}; 1205 return {};
1236 } 1206 }
@@ -1528,15 +1498,8 @@ private:
1528 &SPIRVDecompiler::VoteAny, 1498 &SPIRVDecompiler::VoteAny,
1529 &SPIRVDecompiler::VoteEqual, 1499 &SPIRVDecompiler::VoteEqual,
1530 1500
1501 &SPIRVDecompiler::ThreadId,
1531 &SPIRVDecompiler::ShuffleIndexed, 1502 &SPIRVDecompiler::ShuffleIndexed,
1532 &SPIRVDecompiler::ShuffleUp,
1533 &SPIRVDecompiler::ShuffleDown,
1534 &SPIRVDecompiler::ShuffleButterfly,
1535
1536 &SPIRVDecompiler::InRangeShuffleIndexed,
1537 &SPIRVDecompiler::InRangeShuffleUp,
1538 &SPIRVDecompiler::InRangeShuffleDown,
1539 &SPIRVDecompiler::InRangeShuffleButterfly,
1540 }; 1503 };
1541 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 1504 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1542 1505
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index fa8a250cc..c2875eb2b 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation;
17using Tegra::Shader::VoteOperation; 17using Tegra::Shader::VoteOperation;
18 18
19namespace { 19namespace {
20
20OperationCode GetOperationCode(VoteOperation vote_op) { 21OperationCode GetOperationCode(VoteOperation vote_op) {
21 switch (vote_op) { 22 switch (vote_op) {
22 case VoteOperation::All: 23 case VoteOperation::All:
@@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
30 return OperationCode::VoteAll; 31 return OperationCode::VoteAll;
31 } 32 }
32} 33}
34
33} // Anonymous namespace 35} // Anonymous namespace
34 36
35u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { 37u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
@@ -46,50 +48,50 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
46 break; 48 break;
47 } 49 }
48 case OpCode::Id::SHFL: { 50 case OpCode::Id::SHFL: {
49 Node width = [this, instr] { 51 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
50 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) 52 : GetRegister(instr.gpr39);
51 : GetRegister(instr.gpr39); 53 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
54 : GetRegister(instr.gpr20);
52 55
53 // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has 56 Node thread_id = Operation(OperationCode::ThreadId);
54 // been done reversing Nvidia's math. It won't work on all cases due to SHFL having 57 Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
55 // different parameters that don't properly map to GLSL's interface, but it should work 58 Node seg_mask = BitfieldExtract(mask, 8, 16);
56 // for cases emitted by Nvidia's compiler. 59
57 if (instr.shfl.operation == ShuffleOperation::Up) { 60 Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
58 return Operation( 61 Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
59 OperationCode::ILogicalShiftRight, 62 Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
60 Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), 63 Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
61 Immediate(8));
62 } else {
63 return Operation(OperationCode::ILogicalShiftRight,
64 Operation(OperationCode::IAdd, Immediate(0x201F),
65 Operation(OperationCode::INegate, std::move(mask))),
66 Immediate(8));
67 }
68 }();
69 64
70 const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { 65 Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
71 switch (instr.shfl.operation) { 66 switch (instr.shfl.operation) {
72 case ShuffleOperation::Idx: 67 case ShuffleOperation::Idx:
73 return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; 68 return Operation(OperationCode::IBitwiseOr,
74 case ShuffleOperation::Up: 69 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
75 return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; 70 min_thread_id);
76 case ShuffleOperation::Down: 71 case ShuffleOperation::Down:
77 return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; 72 return Operation(OperationCode::IAdd, thread_id, index);
73 case ShuffleOperation::Up:
74 return Operation(OperationCode::IAdd, thread_id,
75 Operation(OperationCode::INegate, index));
78 case ShuffleOperation::Bfly: 76 case ShuffleOperation::Bfly:
79 return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; 77 return Operation(OperationCode::IBitwiseXor, thread_id, index);
80 } 78 }
81 UNREACHABLE_MSG("Invalid SHFL operation: {}", 79 UNREACHABLE();
82 static_cast<u64>(instr.shfl.operation.Value())); 80 return Immediate(0U);
83 return {};
84 }(); 81 }();
85 82
86 // Setting the predicate before the register is intentional to avoid overwriting. 83 Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
87 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) 84 if (instr.shfl.operation == ShuffleOperation::Up) {
88 : GetRegister(instr.gpr20); 85 return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
89 SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); 86 } else {
87 return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
88 }
89 }();
90
91 SetPredicate(bb, instr.shfl.pred48, in_bounds);
90 SetRegister( 92 SetRegister(
91 bb, instr.gpr0, 93 bb, instr.gpr0,
92 Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); 94 Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
93 break; 95 break;
94 } 96 }
95 default: 97 default:
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4300d9ff4..bd3547e0d 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -181,15 +181,8 @@ enum class OperationCode {
181 VoteAny, /// (bool) -> bool 181 VoteAny, /// (bool) -> bool
182 VoteEqual, /// (bool) -> bool 182 VoteEqual, /// (bool) -> bool
183 183
184 ShuffleIndexed, /// (uint value, uint index, uint width) -> uint 184 ThreadId, /// () -> uint
185 ShuffleUp, /// (uint value, uint index, uint width) -> uint 185 ShuffleIndexed, /// (uint value, uint index) -> uint
186 ShuffleDown, /// (uint value, uint index, uint width) -> uint
187 ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
188
189 InRangeShuffleIndexed, /// (uint index, uint width) -> bool
190 InRangeShuffleUp, /// (uint index, uint width) -> bool
191 InRangeShuffleDown, /// (uint index, uint width) -> bool
192 InRangeShuffleButterfly, /// (uint index, uint width) -> bool
193 186
194 Amount, 187 Amount,
195}; 188};