diff options
| author | 2019-09-20 17:10:42 -0400 | |
|---|---|---|
| committer | 2019-09-20 17:10:42 -0400 | |
| commit | 88d857499b6168d7bcea9b91fa5bdd8b0144c07a (patch) | |
| tree | 1efa1eddb43c0afd3003b449675997abc9a7637f | |
| parent | Merge pull request #2784 from ReinUsesLisp/smem (diff) | |
| parent | shader_ir/warp: Implement SHFL (diff) | |
| download | yuzu-88d857499b6168d7bcea9b91fa5bdd8b0144c07a.tar.gz yuzu-88d857499b6168d7bcea9b91fa5bdd8b0144c07a.tar.xz yuzu-88d857499b6168d7bcea9b91fa5bdd8b0144c07a.zip | |
Merge pull request #2855 from ReinUsesLisp/shfl
shader_ir/warp: Implement SHFL for Nvidia devices
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 63 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 50 | ||||
| -rw-r--r-- | src/video_core/shader/decode/warp.cpp | 47 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 10 |
6 files changed, 182 insertions, 9 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 052e6d24e..a6110bd86 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -566,6 +566,13 @@ enum class ImageAtomicOperation : u64 { | |||
| 566 | Exch = 8, | 566 | Exch = 8, |
| 567 | }; | 567 | }; |
| 568 | 568 | ||
| 569 | enum class ShuffleOperation : u64 { | ||
| 570 | Idx = 0, // shuffleNV | ||
| 571 | Up = 1, // shuffleUpNV | ||
| 572 | Down = 2, // shuffleDownNV | ||
| 573 | Bfly = 3, // shuffleXorNV | ||
| 574 | }; | ||
| 575 | |||
| 569 | union Instruction { | 576 | union Instruction { |
| 570 | Instruction& operator=(const Instruction& instr) { | 577 | Instruction& operator=(const Instruction& instr) { |
| 571 | value = instr.value; | 578 | value = instr.value; |
| @@ -600,6 +607,15 @@ union Instruction { | |||
| 600 | } vote; | 607 | } vote; |
| 601 | 608 | ||
| 602 | union { | 609 | union { |
| 610 | BitField<30, 2, ShuffleOperation> operation; | ||
| 611 | BitField<48, 3, u64> pred48; | ||
| 612 | BitField<28, 1, u64> is_index_imm; | ||
| 613 | BitField<29, 1, u64> is_mask_imm; | ||
| 614 | BitField<20, 5, u64> index_imm; | ||
| 615 | BitField<34, 13, u64> mask_imm; | ||
| 616 | } shfl; | ||
| 617 | |||
| 618 | union { | ||
| 603 | BitField<8, 8, Register> gpr; | 619 | BitField<8, 8, Register> gpr; |
| 604 | BitField<20, 24, s64> offset; | 620 | BitField<20, 24, s64> offset; |
| 605 | } gmem; | 621 | } gmem; |
| @@ -1542,6 +1558,7 @@ public: | |||
| 1542 | BRK, | 1558 | BRK, |
| 1543 | DEPBAR, | 1559 | DEPBAR, |
| 1544 | VOTE, | 1560 | VOTE, |
| 1561 | SHFL, | ||
| 1545 | BFE_C, | 1562 | BFE_C, |
| 1546 | BFE_R, | 1563 | BFE_R, |
| 1547 | BFE_IMM, | 1564 | BFE_IMM, |
| @@ -1833,6 +1850,7 @@ private: | |||
| 1833 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), | 1850 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), |
| 1834 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), | 1851 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), |
| 1835 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), | 1852 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), |
| 1853 | INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), | ||
| 1836 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), | 1854 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), |
| 1837 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), | 1855 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), |
| 1838 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), | 1856 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 909ccb82c..0dbc4c02f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 214 | std::string source = "#version 430 core\n" | 214 | std::string source = "#version 430 core\n" |
| 215 | "#extension GL_ARB_separate_shader_objects : enable\n" | 215 | "#extension GL_ARB_separate_shader_objects : enable\n" |
| 216 | "#extension GL_NV_gpu_shader5 : enable\n" | 216 | "#extension GL_NV_gpu_shader5 : enable\n" |
| 217 | "#extension GL_NV_shader_thread_group : enable\n"; | 217 | "#extension GL_NV_shader_thread_group : enable\n" |
| 218 | "#extension GL_NV_shader_thread_shuffle : enable\n"; | ||
| 218 | if (entries.shader_viewport_layer_array) { | 219 | if (entries.shader_viewport_layer_array) { |
| 219 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | 220 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; |
| 220 | } | 221 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 14834d86a..6c5402e33 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -1953,8 +1953,7 @@ private: | |||
| 1953 | Expression BallotThread(Operation operation) { | 1953 | Expression BallotThread(Operation operation) { |
| 1954 | const std::string value = VisitOperand(operation, 0).AsBool(); | 1954 | const std::string value = VisitOperand(operation, 0).AsBool(); |
| 1955 | if (!device.HasWarpIntrinsics()) { | 1955 | if (!device.HasWarpIntrinsics()) { |
| 1956 | LOG_ERROR(Render_OpenGL, | 1956 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); |
| 1957 | "Nvidia warp intrinsics are not available and its required by a shader"); | ||
| 1958 | // Stub on non-Nvidia devices by simulating all threads voting the same as the active | 1957 | // Stub on non-Nvidia devices by simulating all threads voting the same as the active |
| 1959 | // one. | 1958 | // one. |
| 1960 | return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; | 1959 | return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; |
| @@ -1965,8 +1964,7 @@ private: | |||
| 1965 | Expression Vote(Operation operation, const char* func) { | 1964 | Expression Vote(Operation operation, const char* func) { |
| 1966 | const std::string value = VisitOperand(operation, 0).AsBool(); | 1965 | const std::string value = VisitOperand(operation, 0).AsBool(); |
| 1967 | if (!device.HasWarpIntrinsics()) { | 1966 | if (!device.HasWarpIntrinsics()) { |
| 1968 | LOG_ERROR(Render_OpenGL, | 1967 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); |
| 1969 | "Nvidia vote intrinsics are not available and its required by a shader"); | ||
| 1970 | // Stub with a warp size of one. | 1968 | // Stub with a warp size of one. |
| 1971 | return {value, Type::Bool}; | 1969 | return {value, Type::Bool}; |
| 1972 | } | 1970 | } |
| @@ -1983,15 +1981,54 @@ private: | |||
| 1983 | 1981 | ||
| 1984 | Expression VoteEqual(Operation operation) { | 1982 | Expression VoteEqual(Operation operation) { |
| 1985 | if (!device.HasWarpIntrinsics()) { | 1983 | if (!device.HasWarpIntrinsics()) { |
| 1986 | LOG_ERROR(Render_OpenGL, | 1984 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); |
| 1987 | "Nvidia vote intrinsics are not available and its required by a shader"); | 1985 | // We must return true here since a stub for a theoretical warp size of 1. |
| 1988 | // We must return true here since a stub for a theoretical warp size of 1 will always | 1986 | // This will always return an equal result across all votes. |
| 1989 | // return an equal result for all its votes. | ||
| 1990 | return {"true", Type::Bool}; | 1987 | return {"true", Type::Bool}; |
| 1991 | } | 1988 | } |
| 1992 | return Vote(operation, "allThreadsEqualNV"); | 1989 | return Vote(operation, "allThreadsEqualNV"); |
| 1993 | } | 1990 | } |
| 1994 | 1991 | ||
| 1992 | template <const std::string_view& func> | ||
| 1993 | Expression Shuffle(Operation operation) { | ||
| 1994 | const std::string value = VisitOperand(operation, 0).AsFloat(); | ||
| 1995 | if (!device.HasWarpIntrinsics()) { | ||
| 1996 | LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); | ||
| 1997 | // On a "single-thread" device we are either on the same thread or out of bounds. Both | ||
| 1998 | // cases return the passed value. | ||
| 1999 | return {value, Type::Float}; | ||
| 2000 | } | ||
| 2001 | |||
| 2002 | const std::string index = VisitOperand(operation, 1).AsUint(); | ||
| 2003 | const std::string width = VisitOperand(operation, 2).AsUint(); | ||
| 2004 | return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; | ||
| 2005 | } | ||
| 2006 | |||
| 2007 | template <const std::string_view& func> | ||
| 2008 | Expression InRangeShuffle(Operation operation) { | ||
| 2009 | const std::string index = VisitOperand(operation, 0).AsUint(); | ||
| 2010 | const std::string width = VisitOperand(operation, 1).AsUint(); | ||
| 2011 | if (!device.HasWarpIntrinsics()) { | ||
| 2012 | // On a "single-thread" device we are only in bounds when the requested index is 0. | ||
| 2013 | return {fmt::format("({} == 0U)", index), Type::Bool}; | ||
| 2014 | } | ||
| 2015 | |||
| 2016 | const std::string in_range = code.GenerateTemporary(); | ||
| 2017 | code.AddLine("bool {};", in_range); | ||
| 2018 | code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); | ||
| 2019 | return {in_range, Type::Bool}; | ||
| 2020 | } | ||
| 2021 | |||
| 2022 | struct Func final { | ||
| 2023 | Func() = delete; | ||
| 2024 | ~Func() = delete; | ||
| 2025 | |||
| 2026 | static constexpr std::string_view ShuffleIndexed = "shuffleNV"; | ||
| 2027 | static constexpr std::string_view ShuffleUp = "shuffleUpNV"; | ||
| 2028 | static constexpr std::string_view ShuffleDown = "shuffleDownNV"; | ||
| 2029 | static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; | ||
| 2030 | }; | ||
| 2031 | |||
| 1995 | static constexpr std::array operation_decompilers = { | 2032 | static constexpr std::array operation_decompilers = { |
| 1996 | &GLSLDecompiler::Assign, | 2033 | &GLSLDecompiler::Assign, |
| 1997 | 2034 | ||
| @@ -2154,6 +2191,16 @@ private: | |||
| 2154 | &GLSLDecompiler::VoteAll, | 2191 | &GLSLDecompiler::VoteAll, |
| 2155 | &GLSLDecompiler::VoteAny, | 2192 | &GLSLDecompiler::VoteAny, |
| 2156 | &GLSLDecompiler::VoteEqual, | 2193 | &GLSLDecompiler::VoteEqual, |
| 2194 | |||
| 2195 | &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, | ||
| 2196 | &GLSLDecompiler::Shuffle<Func::ShuffleUp>, | ||
| 2197 | &GLSLDecompiler::Shuffle<Func::ShuffleDown>, | ||
| 2198 | &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, | ||
| 2199 | |||
| 2200 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, | ||
| 2201 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, | ||
| 2202 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, | ||
| 2203 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, | ||
| 2157 | }; | 2204 | }; |
| 2158 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2205 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2159 | 2206 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b9153934e..f7fbbb6e4 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -1127,6 +1127,46 @@ private: | |||
| 1127 | return {}; | 1127 | return {}; |
| 1128 | } | 1128 | } |
| 1129 | 1129 | ||
| 1130 | Id ShuffleIndexed(Operation) { | ||
| 1131 | UNIMPLEMENTED(); | ||
| 1132 | return {}; | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | Id ShuffleUp(Operation) { | ||
| 1136 | UNIMPLEMENTED(); | ||
| 1137 | return {}; | ||
| 1138 | } | ||
| 1139 | |||
| 1140 | Id ShuffleDown(Operation) { | ||
| 1141 | UNIMPLEMENTED(); | ||
| 1142 | return {}; | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | Id ShuffleButterfly(Operation) { | ||
| 1146 | UNIMPLEMENTED(); | ||
| 1147 | return {}; | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | Id InRangeShuffleIndexed(Operation) { | ||
| 1151 | UNIMPLEMENTED(); | ||
| 1152 | return {}; | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | Id InRangeShuffleUp(Operation) { | ||
| 1156 | UNIMPLEMENTED(); | ||
| 1157 | return {}; | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | Id InRangeShuffleDown(Operation) { | ||
| 1161 | UNIMPLEMENTED(); | ||
| 1162 | return {}; | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | Id InRangeShuffleButterfly(Operation) { | ||
| 1166 | UNIMPLEMENTED(); | ||
| 1167 | return {}; | ||
| 1168 | } | ||
| 1169 | |||
| 1130 | Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, | 1170 | Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, |
| 1131 | const std::string& name) { | 1171 | const std::string& name) { |
| 1132 | const Id id = OpVariable(type, storage); | 1172 | const Id id = OpVariable(type, storage); |
| @@ -1431,6 +1471,16 @@ private: | |||
| 1431 | &SPIRVDecompiler::VoteAll, | 1471 | &SPIRVDecompiler::VoteAll, |
| 1432 | &SPIRVDecompiler::VoteAny, | 1472 | &SPIRVDecompiler::VoteAny, |
| 1433 | &SPIRVDecompiler::VoteEqual, | 1473 | &SPIRVDecompiler::VoteEqual, |
| 1474 | |||
| 1475 | &SPIRVDecompiler::ShuffleIndexed, | ||
| 1476 | &SPIRVDecompiler::ShuffleUp, | ||
| 1477 | &SPIRVDecompiler::ShuffleDown, | ||
| 1478 | &SPIRVDecompiler::ShuffleButterfly, | ||
| 1479 | |||
| 1480 | &SPIRVDecompiler::InRangeShuffleIndexed, | ||
| 1481 | &SPIRVDecompiler::InRangeShuffleUp, | ||
| 1482 | &SPIRVDecompiler::InRangeShuffleDown, | ||
| 1483 | &SPIRVDecompiler::InRangeShuffleButterfly, | ||
| 1434 | }; | 1484 | }; |
| 1435 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 1485 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 1436 | 1486 | ||
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index 04ca74f46..a8e481b3c 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp | |||
| @@ -13,6 +13,7 @@ namespace VideoCommon::Shader { | |||
| 13 | using Tegra::Shader::Instruction; | 13 | using Tegra::Shader::Instruction; |
| 14 | using Tegra::Shader::OpCode; | 14 | using Tegra::Shader::OpCode; |
| 15 | using Tegra::Shader::Pred; | 15 | using Tegra::Shader::Pred; |
| 16 | using Tegra::Shader::ShuffleOperation; | ||
| 16 | using Tegra::Shader::VoteOperation; | 17 | using Tegra::Shader::VoteOperation; |
| 17 | 18 | ||
| 18 | namespace { | 19 | namespace { |
| @@ -44,6 +45,52 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | |||
| 44 | SetPredicate(bb, instr.vote.dest_pred, vote); | 45 | SetPredicate(bb, instr.vote.dest_pred, vote); |
| 45 | break; | 46 | break; |
| 46 | } | 47 | } |
| 48 | case OpCode::Id::SHFL: { | ||
| 49 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) | ||
| 50 | : GetRegister(instr.gpr39); | ||
| 51 | Node width = [&] { | ||
| 52 | // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has | ||
| 53 | // been done reversing Nvidia's math. It won't work on all cases due to SHFL having | ||
| 54 | // different parameters that don't properly map to GLSL's interface, but it should work | ||
| 55 | // for cases emitted by Nvidia's compiler. | ||
| 56 | if (instr.shfl.operation == ShuffleOperation::Up) { | ||
| 57 | return Operation( | ||
| 58 | OperationCode::ILogicalShiftRight, | ||
| 59 | Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), | ||
| 60 | Immediate(8)); | ||
| 61 | } else { | ||
| 62 | return Operation(OperationCode::ILogicalShiftRight, | ||
| 63 | Operation(OperationCode::IAdd, Immediate(0x201F), | ||
| 64 | Operation(OperationCode::INegate, std::move(mask))), | ||
| 65 | Immediate(8)); | ||
| 66 | } | ||
| 67 | }(); | ||
| 68 | |||
| 69 | const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { | ||
| 70 | switch (instr.shfl.operation) { | ||
| 71 | case ShuffleOperation::Idx: | ||
| 72 | return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; | ||
| 73 | case ShuffleOperation::Up: | ||
| 74 | return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; | ||
| 75 | case ShuffleOperation::Down: | ||
| 76 | return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; | ||
| 77 | case ShuffleOperation::Bfly: | ||
| 78 | return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; | ||
| 79 | } | ||
| 80 | UNREACHABLE_MSG("Invalid SHFL operation: {}", | ||
| 81 | static_cast<u64>(instr.shfl.operation.Value())); | ||
| 82 | return {}; | ||
| 83 | }(); | ||
| 84 | |||
| 85 | // Setting the predicate before the register is intentional to avoid overwriting. | ||
| 86 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) | ||
| 87 | : GetRegister(instr.gpr20); | ||
| 88 | SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); | ||
| 89 | SetRegister( | ||
| 90 | bb, instr.gpr0, | ||
| 91 | Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); | ||
| 92 | break; | ||
| 93 | } | ||
| 47 | default: | 94 | default: |
| 48 | UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); | 95 | UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); |
| 49 | break; | 96 | break; |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 425111cc4..abf2cb1ab 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -181,6 +181,16 @@ enum class OperationCode { | |||
| 181 | VoteAny, /// (bool) -> bool | 181 | VoteAny, /// (bool) -> bool |
| 182 | VoteEqual, /// (bool) -> bool | 182 | VoteEqual, /// (bool) -> bool |
| 183 | 183 | ||
| 184 | ShuffleIndexed, /// (uint value, uint index, uint width) -> uint | ||
| 185 | ShuffleUp, /// (uint value, uint index, uint width) -> uint | ||
| 186 | ShuffleDown, /// (uint value, uint index, uint width) -> uint | ||
| 187 | ShuffleButterfly, /// (uint value, uint index, uint width) -> uint | ||
| 188 | |||
| 189 | InRangeShuffleIndexed, /// (uint index, uint width) -> bool | ||
| 190 | InRangeShuffleUp, /// (uint index, uint width) -> bool | ||
| 191 | InRangeShuffleDown, /// (uint index, uint width) -> bool | ||
| 192 | InRangeShuffleButterfly, /// (uint index, uint width) -> bool | ||
| 193 | |||
| 184 | Amount, | 194 | Amount, |
| 185 | }; | 195 | }; |
| 186 | 196 | ||