diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 48 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 43 | ||||
| -rw-r--r-- | src/video_core/shader/decode/warp.cpp | 68 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 11 |
5 files changed, 49 insertions, 122 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f1b89165d..a287b5ee1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -275,6 +275,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy | |||
| 275 | std::string source = fmt::format(R"(// {} | 275 | std::string source = fmt::format(R"(// {} |
| 276 | #version 430 core | 276 | #version 430 core |
| 277 | #extension GL_ARB_separate_shader_objects : enable | 277 | #extension GL_ARB_separate_shader_objects : enable |
| 278 | #extension GL_ARB_shader_ballot : enable | ||
| 278 | #extension GL_ARB_shader_viewport_layer_array : enable | 279 | #extension GL_ARB_shader_viewport_layer_array : enable |
| 279 | #extension GL_EXT_shader_image_load_formatted : enable | 280 | #extension GL_EXT_shader_image_load_formatted : enable |
| 280 | #extension GL_NV_gpu_shader5 : enable | 281 | #extension GL_NV_gpu_shader5 : enable |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 92ee8459e..ca4e6e468 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -1942,34 +1942,14 @@ private: | |||
| 1942 | return Vote(operation, "allThreadsEqualNV"); | 1942 | return Vote(operation, "allThreadsEqualNV"); |
| 1943 | } | 1943 | } |
| 1944 | 1944 | ||
| 1945 | template <const std::string_view& func> | 1945 | Expression ThreadId(Operation operation) { |
| 1946 | Expression Shuffle(Operation operation) { | 1946 | return {"gl_SubGroupInvocationARB", Type::Uint}; |
| 1947 | const std::string value = VisitOperand(operation, 0).AsFloat(); | ||
| 1948 | if (!device.HasWarpIntrinsics()) { | ||
| 1949 | LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); | ||
| 1950 | // On a "single-thread" device we are either on the same thread or out of bounds. Both | ||
| 1951 | // cases return the passed value. | ||
| 1952 | return {value, Type::Float}; | ||
| 1953 | } | ||
| 1954 | |||
| 1955 | const std::string index = VisitOperand(operation, 1).AsUint(); | ||
| 1956 | const std::string width = VisitOperand(operation, 2).AsUint(); | ||
| 1957 | return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; | ||
| 1958 | } | 1947 | } |
| 1959 | 1948 | ||
| 1960 | template <const std::string_view& func> | 1949 | Expression ShuffleIndexed(Operation operation) { |
| 1961 | Expression InRangeShuffle(Operation operation) { | 1950 | const std::string value = VisitOperand(operation, 0).AsFloat(); |
| 1962 | const std::string index = VisitOperand(operation, 0).AsUint(); | 1951 | const std::string index = VisitOperand(operation, 1).AsUint(); |
| 1963 | const std::string width = VisitOperand(operation, 1).AsUint(); | 1952 | return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; |
| 1964 | if (!device.HasWarpIntrinsics()) { | ||
| 1965 | // On a "single-thread" device we are only in bounds when the requested index is 0. | ||
| 1966 | return {fmt::format("({} == 0U)", index), Type::Bool}; | ||
| 1967 | } | ||
| 1968 | |||
| 1969 | const std::string in_range = code.GenerateTemporary(); | ||
| 1970 | code.AddLine("bool {};", in_range); | ||
| 1971 | code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); | ||
| 1972 | return {in_range, Type::Bool}; | ||
| 1973 | } | 1953 | } |
| 1974 | 1954 | ||
| 1975 | struct Func final { | 1955 | struct Func final { |
| @@ -1981,11 +1961,6 @@ private: | |||
| 1981 | static constexpr std::string_view Or = "Or"; | 1961 | static constexpr std::string_view Or = "Or"; |
| 1982 | static constexpr std::string_view Xor = "Xor"; | 1962 | static constexpr std::string_view Xor = "Xor"; |
| 1983 | static constexpr std::string_view Exchange = "Exchange"; | 1963 | static constexpr std::string_view Exchange = "Exchange"; |
| 1984 | |||
| 1985 | static constexpr std::string_view ShuffleIndexed = "shuffleNV"; | ||
| 1986 | static constexpr std::string_view ShuffleUp = "shuffleUpNV"; | ||
| 1987 | static constexpr std::string_view ShuffleDown = "shuffleDownNV"; | ||
| 1988 | static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; | ||
| 1989 | }; | 1964 | }; |
| 1990 | 1965 | ||
| 1991 | static constexpr std::array operation_decompilers = { | 1966 | static constexpr std::array operation_decompilers = { |
| @@ -2151,15 +2126,8 @@ private: | |||
| 2151 | &GLSLDecompiler::VoteAny, | 2126 | &GLSLDecompiler::VoteAny, |
| 2152 | &GLSLDecompiler::VoteEqual, | 2127 | &GLSLDecompiler::VoteEqual, |
| 2153 | 2128 | ||
| 2154 | &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, | 2129 | &GLSLDecompiler::ThreadId, |
| 2155 | &GLSLDecompiler::Shuffle<Func::ShuffleUp>, | 2130 | &GLSLDecompiler::ShuffleIndexed, |
| 2156 | &GLSLDecompiler::Shuffle<Func::ShuffleDown>, | ||
| 2157 | &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, | ||
| 2158 | |||
| 2159 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, | ||
| 2160 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, | ||
| 2161 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, | ||
| 2162 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, | ||
| 2163 | }; | 2131 | }; |
| 2164 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2132 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2165 | 2133 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 42cf068b6..383720ea1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -1195,42 +1195,12 @@ private: | |||
| 1195 | return {}; | 1195 | return {}; |
| 1196 | } | 1196 | } |
| 1197 | 1197 | ||
| 1198 | Id ShuffleIndexed(Operation) { | 1198 | Id ThreadId(Operation) { |
| 1199 | UNIMPLEMENTED(); | ||
| 1200 | return {}; | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | Id ShuffleUp(Operation) { | ||
| 1204 | UNIMPLEMENTED(); | ||
| 1205 | return {}; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | Id ShuffleDown(Operation) { | ||
| 1209 | UNIMPLEMENTED(); | ||
| 1210 | return {}; | ||
| 1211 | } | ||
| 1212 | |||
| 1213 | Id ShuffleButterfly(Operation) { | ||
| 1214 | UNIMPLEMENTED(); | 1199 | UNIMPLEMENTED(); |
| 1215 | return {}; | 1200 | return {}; |
| 1216 | } | 1201 | } |
| 1217 | 1202 | ||
| 1218 | Id InRangeShuffleIndexed(Operation) { | 1203 | Id ShuffleIndexed(Operation) { |
| 1219 | UNIMPLEMENTED(); | ||
| 1220 | return {}; | ||
| 1221 | } | ||
| 1222 | |||
| 1223 | Id InRangeShuffleUp(Operation) { | ||
| 1224 | UNIMPLEMENTED(); | ||
| 1225 | return {}; | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | Id InRangeShuffleDown(Operation) { | ||
| 1229 | UNIMPLEMENTED(); | ||
| 1230 | return {}; | ||
| 1231 | } | ||
| 1232 | |||
| 1233 | Id InRangeShuffleButterfly(Operation) { | ||
| 1234 | UNIMPLEMENTED(); | 1204 | UNIMPLEMENTED(); |
| 1235 | return {}; | 1205 | return {}; |
| 1236 | } | 1206 | } |
| @@ -1528,15 +1498,8 @@ private: | |||
| 1528 | &SPIRVDecompiler::VoteAny, | 1498 | &SPIRVDecompiler::VoteAny, |
| 1529 | &SPIRVDecompiler::VoteEqual, | 1499 | &SPIRVDecompiler::VoteEqual, |
| 1530 | 1500 | ||
| 1501 | &SPIRVDecompiler::ThreadId, | ||
| 1531 | &SPIRVDecompiler::ShuffleIndexed, | 1502 | &SPIRVDecompiler::ShuffleIndexed, |
| 1532 | &SPIRVDecompiler::ShuffleUp, | ||
| 1533 | &SPIRVDecompiler::ShuffleDown, | ||
| 1534 | &SPIRVDecompiler::ShuffleButterfly, | ||
| 1535 | |||
| 1536 | &SPIRVDecompiler::InRangeShuffleIndexed, | ||
| 1537 | &SPIRVDecompiler::InRangeShuffleUp, | ||
| 1538 | &SPIRVDecompiler::InRangeShuffleDown, | ||
| 1539 | &SPIRVDecompiler::InRangeShuffleButterfly, | ||
| 1540 | }; | 1503 | }; |
| 1541 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 1504 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 1542 | 1505 | ||
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index fa8a250cc..c2875eb2b 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp | |||
| @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation; | |||
| 17 | using Tegra::Shader::VoteOperation; | 17 | using Tegra::Shader::VoteOperation; |
| 18 | 18 | ||
| 19 | namespace { | 19 | namespace { |
| 20 | |||
| 20 | OperationCode GetOperationCode(VoteOperation vote_op) { | 21 | OperationCode GetOperationCode(VoteOperation vote_op) { |
| 21 | switch (vote_op) { | 22 | switch (vote_op) { |
| 22 | case VoteOperation::All: | 23 | case VoteOperation::All: |
| @@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { | |||
| 30 | return OperationCode::VoteAll; | 31 | return OperationCode::VoteAll; |
| 31 | } | 32 | } |
| 32 | } | 33 | } |
| 34 | |||
| 33 | } // Anonymous namespace | 35 | } // Anonymous namespace |
| 34 | 36 | ||
| 35 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | 37 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { |
| @@ -46,50 +48,50 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | |||
| 46 | break; | 48 | break; |
| 47 | } | 49 | } |
| 48 | case OpCode::Id::SHFL: { | 50 | case OpCode::Id::SHFL: { |
| 49 | Node width = [this, instr] { | 51 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) |
| 50 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) | 52 | : GetRegister(instr.gpr39); |
| 51 | : GetRegister(instr.gpr39); | 53 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) |
| 54 | : GetRegister(instr.gpr20); | ||
| 52 | 55 | ||
| 53 | // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has | 56 | Node thread_id = Operation(OperationCode::ThreadId); |
| 54 | // been done reversing Nvidia's math. It won't work on all cases due to SHFL having | 57 | Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); |
| 55 | // different parameters that don't properly map to GLSL's interface, but it should work | 58 | Node seg_mask = BitfieldExtract(mask, 8, 16); |
| 56 | // for cases emitted by Nvidia's compiler. | 59 | |
| 57 | if (instr.shfl.operation == ShuffleOperation::Up) { | 60 | Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); |
| 58 | return Operation( | 61 | Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); |
| 59 | OperationCode::ILogicalShiftRight, | 62 | Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, |
| 60 | Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), | 63 | Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); |
| 61 | Immediate(8)); | ||
| 62 | } else { | ||
| 63 | return Operation(OperationCode::ILogicalShiftRight, | ||
| 64 | Operation(OperationCode::IAdd, Immediate(0x201F), | ||
| 65 | Operation(OperationCode::INegate, std::move(mask))), | ||
| 66 | Immediate(8)); | ||
| 67 | } | ||
| 68 | }(); | ||
| 69 | 64 | ||
| 70 | const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { | 65 | Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { |
| 71 | switch (instr.shfl.operation) { | 66 | switch (instr.shfl.operation) { |
| 72 | case ShuffleOperation::Idx: | 67 | case ShuffleOperation::Idx: |
| 73 | return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; | 68 | return Operation(OperationCode::IBitwiseOr, |
| 74 | case ShuffleOperation::Up: | 69 | Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), |
| 75 | return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; | 70 | min_thread_id); |
| 76 | case ShuffleOperation::Down: | 71 | case ShuffleOperation::Down: |
| 77 | return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; | 72 | return Operation(OperationCode::IAdd, thread_id, index); |
| 73 | case ShuffleOperation::Up: | ||
| 74 | return Operation(OperationCode::IAdd, thread_id, | ||
| 75 | Operation(OperationCode::INegate, index)); | ||
| 78 | case ShuffleOperation::Bfly: | 76 | case ShuffleOperation::Bfly: |
| 79 | return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; | 77 | return Operation(OperationCode::IBitwiseXor, thread_id, index); |
| 80 | } | 78 | } |
| 81 | UNREACHABLE_MSG("Invalid SHFL operation: {}", | 79 | UNREACHABLE(); |
| 82 | static_cast<u64>(instr.shfl.operation.Value())); | 80 | return Immediate(0U); |
| 83 | return {}; | ||
| 84 | }(); | 81 | }(); |
| 85 | 82 | ||
| 86 | // Setting the predicate before the register is intentional to avoid overwriting. | 83 | Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { |
| 87 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) | 84 | if (instr.shfl.operation == ShuffleOperation::Up) { |
| 88 | : GetRegister(instr.gpr20); | 85 | return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); |
| 89 | SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); | 86 | } else { |
| 87 | return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); | ||
| 88 | } | ||
| 89 | }(); | ||
| 90 | |||
| 91 | SetPredicate(bb, instr.shfl.pred48, in_bounds); | ||
| 90 | SetRegister( | 92 | SetRegister( |
| 91 | bb, instr.gpr0, | 93 | bb, instr.gpr0, |
| 92 | Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); | 94 | Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); |
| 93 | break; | 95 | break; |
| 94 | } | 96 | } |
| 95 | default: | 97 | default: |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4300d9ff4..bd3547e0d 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -181,15 +181,8 @@ enum class OperationCode { | |||
| 181 | VoteAny, /// (bool) -> bool | 181 | VoteAny, /// (bool) -> bool |
| 182 | VoteEqual, /// (bool) -> bool | 182 | VoteEqual, /// (bool) -> bool |
| 183 | 183 | ||
| 184 | ShuffleIndexed, /// (uint value, uint index, uint width) -> uint | 184 | ThreadId, /// () -> uint |
| 185 | ShuffleUp, /// (uint value, uint index, uint width) -> uint | 185 | ShuffleIndexed, /// (uint value, uint index) -> uint |
| 186 | ShuffleDown, /// (uint value, uint index, uint width) -> uint | ||
| 187 | ShuffleButterfly, /// (uint value, uint index, uint width) -> uint | ||
| 188 | |||
| 189 | InRangeShuffleIndexed, /// (uint index, uint width) -> bool | ||
| 190 | InRangeShuffleUp, /// (uint index, uint width) -> bool | ||
| 191 | InRangeShuffleDown, /// (uint index, uint width) -> bool | ||
| 192 | InRangeShuffleButterfly, /// (uint index, uint width) -> bool | ||
| 193 | 186 | ||
| 194 | Amount, | 187 | Amount, |
| 195 | }; | 188 | }; |