diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 76 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/decode/warp.cpp | 79 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 12 |
8 files changed, 127 insertions, 125 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 78d6886fb..9fafed4a2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -616,6 +616,14 @@ union Instruction { | |||
| 616 | } shfl; | 616 | } shfl; |
| 617 | 617 | ||
| 618 | union { | 618 | union { |
| 619 | BitField<44, 1, u64> ftz; | ||
| 620 | BitField<39, 2, u64> tab5cb8_2; | ||
| 621 | BitField<38, 1, u64> ndv; | ||
| 622 | BitField<47, 1, u64> cc; | ||
| 623 | BitField<28, 8, u64> swizzle; | ||
| 624 | } fswzadd; | ||
| 625 | |||
| 626 | union { | ||
| 619 | BitField<8, 8, Register> gpr; | 627 | BitField<8, 8, Register> gpr; |
| 620 | BitField<20, 24, s64> offset; | 628 | BitField<20, 24, s64> offset; |
| 621 | } gmem; | 629 | } gmem; |
| @@ -1592,6 +1600,7 @@ public: | |||
| 1592 | DEPBAR, | 1600 | DEPBAR, |
| 1593 | VOTE, | 1601 | VOTE, |
| 1594 | SHFL, | 1602 | SHFL, |
| 1603 | FSWZADD, | ||
| 1595 | BFE_C, | 1604 | BFE_C, |
| 1596 | BFE_R, | 1605 | BFE_R, |
| 1597 | BFE_IMM, | 1606 | BFE_IMM, |
| @@ -1890,6 +1899,7 @@ private: | |||
| 1890 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), | 1899 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), |
| 1891 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), | 1900 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), |
| 1892 | INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), | 1901 | INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), |
| 1902 | INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), | ||
| 1893 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), | 1903 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), |
| 1894 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), | 1904 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), |
| 1895 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), | 1905 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c65b24c69..b30d5be74 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -62,6 +62,7 @@ Device::Device() { | |||
| 62 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 62 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 63 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | 63 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && |
| 64 | GLAD_GL_NV_shader_thread_shuffle; | 64 | GLAD_GL_NV_shader_thread_shuffle; |
| 65 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; | ||
| 65 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | 66 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; |
| 66 | has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); | 67 | has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); |
| 67 | has_variable_aoffi = TestVariableAoffi(); | 68 | has_variable_aoffi = TestVariableAoffi(); |
| @@ -79,6 +80,7 @@ Device::Device(std::nullptr_t) { | |||
| 79 | max_vertex_attributes = 16; | 80 | max_vertex_attributes = 16; |
| 80 | max_varyings = 15; | 81 | max_varyings = 15; |
| 81 | has_warp_intrinsics = true; | 82 | has_warp_intrinsics = true; |
| 83 | has_shader_ballot = true; | ||
| 82 | has_vertex_viewport_layer = true; | 84 | has_vertex_viewport_layer = true; |
| 83 | has_image_load_formatted = true; | 85 | has_image_load_formatted = true; |
| 84 | has_variable_aoffi = true; | 86 | has_variable_aoffi = true; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index bf35bd0b6..6c86fe207 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -34,6 +34,10 @@ public: | |||
| 34 | return has_warp_intrinsics; | 34 | return has_warp_intrinsics; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | bool HasShaderBallot() const { | ||
| 38 | return has_shader_ballot; | ||
| 39 | } | ||
| 40 | |||
| 37 | bool HasVertexViewportLayer() const { | 41 | bool HasVertexViewportLayer() const { |
| 38 | return has_vertex_viewport_layer; | 42 | return has_vertex_viewport_layer; |
| 39 | } | 43 | } |
| @@ -68,6 +72,7 @@ private: | |||
| 68 | u32 max_vertex_attributes{}; | 72 | u32 max_vertex_attributes{}; |
| 69 | u32 max_varyings{}; | 73 | u32 max_varyings{}; |
| 70 | bool has_warp_intrinsics{}; | 74 | bool has_warp_intrinsics{}; |
| 75 | bool has_shader_ballot{}; | ||
| 71 | bool has_vertex_viewport_layer{}; | 76 | bool has_vertex_viewport_layer{}; |
| 72 | bool has_image_load_formatted{}; | 77 | bool has_image_load_formatted{}; |
| 73 | bool has_variable_aoffi{}; | 78 | bool has_variable_aoffi{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 35e5214a5..04a239a39 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -275,16 +275,25 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy | |||
| 275 | std::string source = fmt::format(R"(// {} | 275 | std::string source = fmt::format(R"(// {} |
| 276 | #version 430 core | 276 | #version 430 core |
| 277 | #extension GL_ARB_separate_shader_objects : enable | 277 | #extension GL_ARB_separate_shader_objects : enable |
| 278 | #extension GL_ARB_shader_viewport_layer_array : enable | ||
| 279 | #extension GL_EXT_shader_image_load_formatted : enable | ||
| 280 | #extension GL_NV_gpu_shader5 : enable | ||
| 281 | #extension GL_NV_shader_thread_group : enable | ||
| 282 | #extension GL_NV_shader_thread_shuffle : enable | ||
| 283 | )", | 278 | )", |
| 284 | GetShaderId(unique_identifier, program_type)); | 279 | GetShaderId(unique_identifier, program_type)); |
| 285 | if (is_compute) { | 280 | if (is_compute) { |
| 286 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | 281 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; |
| 287 | } | 282 | } |
| 283 | if (device.HasShaderBallot()) { | ||
| 284 | source += "#extension GL_ARB_shader_ballot : require\n"; | ||
| 285 | } | ||
| 286 | if (device.HasVertexViewportLayer()) { | ||
| 287 | source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; | ||
| 288 | } | ||
| 289 | if (device.HasImageLoadFormatted()) { | ||
| 290 | source += "#extension GL_EXT_shader_image_load_formatted : require\n"; | ||
| 291 | } | ||
| 292 | if (device.HasWarpIntrinsics()) { | ||
| 293 | source += "#extension GL_NV_gpu_shader5 : require\n" | ||
| 294 | "#extension GL_NV_shader_thread_group : require\n" | ||
| 295 | "#extension GL_NV_shader_thread_shuffle : require\n"; | ||
| 296 | } | ||
| 288 | source += '\n'; | 297 | source += '\n'; |
| 289 | 298 | ||
| 290 | if (!is_compute) { | 299 | if (!is_compute) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0ce59a852..e56ed51de 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -1379,6 +1379,26 @@ private: | |||
| 1379 | return GenerateUnary(operation, "float", Type::Float, type); | 1379 | return GenerateUnary(operation, "float", Type::Float, type); |
| 1380 | } | 1380 | } |
| 1381 | 1381 | ||
| 1382 | Expression FSwizzleAdd(Operation operation) { | ||
| 1383 | const std::string op_a = VisitOperand(operation, 0).AsFloat(); | ||
| 1384 | const std::string op_b = VisitOperand(operation, 1).AsFloat(); | ||
| 1385 | |||
| 1386 | if (!device.HasShaderBallot()) { | ||
| 1387 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 1388 | return {fmt::format("{} + {}", op_a, op_b), Type::Float}; | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | const std::string instr_mask = VisitOperand(operation, 2).AsUint(); | ||
| 1392 | const std::string mask = code.GenerateTemporary(); | ||
| 1393 | code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, | ||
| 1394 | instr_mask); | ||
| 1395 | |||
| 1396 | const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); | ||
| 1397 | const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); | ||
| 1398 | return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), | ||
| 1399 | Type::Float}; | ||
| 1400 | } | ||
| 1401 | |||
| 1382 | Expression ICastFloat(Operation operation) { | 1402 | Expression ICastFloat(Operation operation) { |
| 1383 | return GenerateUnary(operation, "int", Type::Int, Type::Float); | 1403 | return GenerateUnary(operation, "int", Type::Int, Type::Float); |
| 1384 | } | 1404 | } |
| @@ -1942,34 +1962,24 @@ private: | |||
| 1942 | return Vote(operation, "allThreadsEqualNV"); | 1962 | return Vote(operation, "allThreadsEqualNV"); |
| 1943 | } | 1963 | } |
| 1944 | 1964 | ||
| 1945 | template <const std::string_view& func> | 1965 | Expression ThreadId(Operation operation) { |
| 1946 | Expression Shuffle(Operation operation) { | 1966 | if (!device.HasShaderBallot()) { |
| 1947 | const std::string value = VisitOperand(operation, 0).AsFloat(); | 1967 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); |
| 1948 | if (!device.HasWarpIntrinsics()) { | 1968 | return {"0U", Type::Uint}; |
| 1949 | LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); | ||
| 1950 | // On a "single-thread" device we are either on the same thread or out of bounds. Both | ||
| 1951 | // cases return the passed value. | ||
| 1952 | return {value, Type::Float}; | ||
| 1953 | } | 1969 | } |
| 1954 | 1970 | return {"gl_SubGroupInvocationARB", Type::Uint}; | |
| 1955 | const std::string index = VisitOperand(operation, 1).AsUint(); | ||
| 1956 | const std::string width = VisitOperand(operation, 2).AsUint(); | ||
| 1957 | return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; | ||
| 1958 | } | 1971 | } |
| 1959 | 1972 | ||
| 1960 | template <const std::string_view& func> | 1973 | Expression ShuffleIndexed(Operation operation) { |
| 1961 | Expression InRangeShuffle(Operation operation) { | 1974 | std::string value = VisitOperand(operation, 0).AsFloat(); |
| 1962 | const std::string index = VisitOperand(operation, 0).AsUint(); | 1975 | |
| 1963 | const std::string width = VisitOperand(operation, 1).AsUint(); | 1976 | if (!device.HasShaderBallot()) { |
| 1964 | if (!device.HasWarpIntrinsics()) { | 1977 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); |
| 1965 | // On a "single-thread" device we are only in bounds when the requested index is 0. | 1978 | return {std::move(value), Type::Float}; |
| 1966 | return {fmt::format("({} == 0U)", index), Type::Bool}; | ||
| 1967 | } | 1979 | } |
| 1968 | 1980 | ||
| 1969 | const std::string in_range = code.GenerateTemporary(); | 1981 | const std::string index = VisitOperand(operation, 1).AsUint(); |
| 1970 | code.AddLine("bool {};", in_range); | 1982 | return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; |
| 1971 | code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); | ||
| 1972 | return {in_range, Type::Bool}; | ||
| 1973 | } | 1983 | } |
| 1974 | 1984 | ||
| 1975 | struct Func final { | 1985 | struct Func final { |
| @@ -1981,11 +1991,6 @@ private: | |||
| 1981 | static constexpr std::string_view Or = "Or"; | 1991 | static constexpr std::string_view Or = "Or"; |
| 1982 | static constexpr std::string_view Xor = "Xor"; | 1992 | static constexpr std::string_view Xor = "Xor"; |
| 1983 | static constexpr std::string_view Exchange = "Exchange"; | 1993 | static constexpr std::string_view Exchange = "Exchange"; |
| 1984 | |||
| 1985 | static constexpr std::string_view ShuffleIndexed = "shuffleNV"; | ||
| 1986 | static constexpr std::string_view ShuffleUp = "shuffleUpNV"; | ||
| 1987 | static constexpr std::string_view ShuffleDown = "shuffleDownNV"; | ||
| 1988 | static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; | ||
| 1989 | }; | 1994 | }; |
| 1990 | 1995 | ||
| 1991 | static constexpr std::array operation_decompilers = { | 1996 | static constexpr std::array operation_decompilers = { |
| @@ -2016,6 +2021,7 @@ private: | |||
| 2016 | &GLSLDecompiler::FTrunc, | 2021 | &GLSLDecompiler::FTrunc, |
| 2017 | &GLSLDecompiler::FCastInteger<Type::Int>, | 2022 | &GLSLDecompiler::FCastInteger<Type::Int>, |
| 2018 | &GLSLDecompiler::FCastInteger<Type::Uint>, | 2023 | &GLSLDecompiler::FCastInteger<Type::Uint>, |
| 2024 | &GLSLDecompiler::FSwizzleAdd, | ||
| 2019 | 2025 | ||
| 2020 | &GLSLDecompiler::Add<Type::Int>, | 2026 | &GLSLDecompiler::Add<Type::Int>, |
| 2021 | &GLSLDecompiler::Mul<Type::Int>, | 2027 | &GLSLDecompiler::Mul<Type::Int>, |
| @@ -2151,15 +2157,8 @@ private: | |||
| 2151 | &GLSLDecompiler::VoteAny, | 2157 | &GLSLDecompiler::VoteAny, |
| 2152 | &GLSLDecompiler::VoteEqual, | 2158 | &GLSLDecompiler::VoteEqual, |
| 2153 | 2159 | ||
| 2154 | &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, | 2160 | &GLSLDecompiler::ThreadId, |
| 2155 | &GLSLDecompiler::Shuffle<Func::ShuffleUp>, | 2161 | &GLSLDecompiler::ShuffleIndexed, |
| 2156 | &GLSLDecompiler::Shuffle<Func::ShuffleDown>, | ||
| 2157 | &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, | ||
| 2158 | |||
| 2159 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, | ||
| 2160 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, | ||
| 2161 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, | ||
| 2162 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, | ||
| 2163 | }; | 2162 | }; |
| 2164 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2163 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2165 | 2164 | ||
| @@ -2492,6 +2491,9 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { | |||
| 2492 | bvec2 is_nan2 = isnan(pair2); | 2491 | bvec2 is_nan2 = isnan(pair2); |
| 2493 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | 2492 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); |
| 2494 | } | 2493 | } |
| 2494 | |||
| 2495 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 2496 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 2495 | )"; | 2497 | )"; |
| 2496 | } | 2498 | } |
| 2497 | 2499 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 42cf068b6..2850d5b59 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -783,6 +783,11 @@ private: | |||
| 783 | return {}; | 783 | return {}; |
| 784 | } | 784 | } |
| 785 | 785 | ||
| 786 | Id FSwizzleAdd(Operation operation) { | ||
| 787 | UNIMPLEMENTED(); | ||
| 788 | return {}; | ||
| 789 | } | ||
| 790 | |||
| 786 | Id HNegate(Operation operation) { | 791 | Id HNegate(Operation operation) { |
| 787 | UNIMPLEMENTED(); | 792 | UNIMPLEMENTED(); |
| 788 | return {}; | 793 | return {}; |
| @@ -1195,42 +1200,12 @@ private: | |||
| 1195 | return {}; | 1200 | return {}; |
| 1196 | } | 1201 | } |
| 1197 | 1202 | ||
| 1198 | Id ShuffleIndexed(Operation) { | 1203 | Id ThreadId(Operation) { |
| 1199 | UNIMPLEMENTED(); | ||
| 1200 | return {}; | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | Id ShuffleUp(Operation) { | ||
| 1204 | UNIMPLEMENTED(); | ||
| 1205 | return {}; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | Id ShuffleDown(Operation) { | ||
| 1209 | UNIMPLEMENTED(); | ||
| 1210 | return {}; | ||
| 1211 | } | ||
| 1212 | |||
| 1213 | Id ShuffleButterfly(Operation) { | ||
| 1214 | UNIMPLEMENTED(); | ||
| 1215 | return {}; | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | Id InRangeShuffleIndexed(Operation) { | ||
| 1219 | UNIMPLEMENTED(); | 1204 | UNIMPLEMENTED(); |
| 1220 | return {}; | 1205 | return {}; |
| 1221 | } | 1206 | } |
| 1222 | 1207 | ||
| 1223 | Id InRangeShuffleUp(Operation) { | 1208 | Id ShuffleIndexed(Operation) { |
| 1224 | UNIMPLEMENTED(); | ||
| 1225 | return {}; | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | Id InRangeShuffleDown(Operation) { | ||
| 1229 | UNIMPLEMENTED(); | ||
| 1230 | return {}; | ||
| 1231 | } | ||
| 1232 | |||
| 1233 | Id InRangeShuffleButterfly(Operation) { | ||
| 1234 | UNIMPLEMENTED(); | 1209 | UNIMPLEMENTED(); |
| 1235 | return {}; | 1210 | return {}; |
| 1236 | } | 1211 | } |
| @@ -1393,6 +1368,7 @@ private: | |||
| 1393 | &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, | 1368 | &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, |
| 1394 | &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, | 1369 | &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, |
| 1395 | &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, | 1370 | &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, |
| 1371 | &SPIRVDecompiler::FSwizzleAdd, | ||
| 1396 | 1372 | ||
| 1397 | &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, | 1373 | &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, |
| 1398 | &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, | 1374 | &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, |
| @@ -1528,15 +1504,8 @@ private: | |||
| 1528 | &SPIRVDecompiler::VoteAny, | 1504 | &SPIRVDecompiler::VoteAny, |
| 1529 | &SPIRVDecompiler::VoteEqual, | 1505 | &SPIRVDecompiler::VoteEqual, |
| 1530 | 1506 | ||
| 1507 | &SPIRVDecompiler::ThreadId, | ||
| 1531 | &SPIRVDecompiler::ShuffleIndexed, | 1508 | &SPIRVDecompiler::ShuffleIndexed, |
| 1532 | &SPIRVDecompiler::ShuffleUp, | ||
| 1533 | &SPIRVDecompiler::ShuffleDown, | ||
| 1534 | &SPIRVDecompiler::ShuffleButterfly, | ||
| 1535 | |||
| 1536 | &SPIRVDecompiler::InRangeShuffleIndexed, | ||
| 1537 | &SPIRVDecompiler::InRangeShuffleUp, | ||
| 1538 | &SPIRVDecompiler::InRangeShuffleDown, | ||
| 1539 | &SPIRVDecompiler::InRangeShuffleButterfly, | ||
| 1540 | }; | 1509 | }; |
| 1541 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 1510 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 1542 | 1511 | ||
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index fa8a250cc..d98d0e1dd 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp | |||
| @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation; | |||
| 17 | using Tegra::Shader::VoteOperation; | 17 | using Tegra::Shader::VoteOperation; |
| 18 | 18 | ||
| 19 | namespace { | 19 | namespace { |
| 20 | |||
| 20 | OperationCode GetOperationCode(VoteOperation vote_op) { | 21 | OperationCode GetOperationCode(VoteOperation vote_op) { |
| 21 | switch (vote_op) { | 22 | switch (vote_op) { |
| 22 | case VoteOperation::All: | 23 | case VoteOperation::All: |
| @@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { | |||
| 30 | return OperationCode::VoteAll; | 31 | return OperationCode::VoteAll; |
| 31 | } | 32 | } |
| 32 | } | 33 | } |
| 34 | |||
| 33 | } // Anonymous namespace | 35 | } // Anonymous namespace |
| 34 | 36 | ||
| 35 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | 37 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { |
| @@ -46,50 +48,59 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | |||
| 46 | break; | 48 | break; |
| 47 | } | 49 | } |
| 48 | case OpCode::Id::SHFL: { | 50 | case OpCode::Id::SHFL: { |
| 49 | Node width = [this, instr] { | 51 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) |
| 50 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) | 52 | : GetRegister(instr.gpr39); |
| 51 | : GetRegister(instr.gpr39); | 53 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) |
| 52 | 54 | : GetRegister(instr.gpr20); | |
| 53 | // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has | 55 | |
| 54 | // been done reversing Nvidia's math. It won't work on all cases due to SHFL having | 56 | Node thread_id = Operation(OperationCode::ThreadId); |
| 55 | // different parameters that don't properly map to GLSL's interface, but it should work | 57 | Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); |
| 56 | // for cases emitted by Nvidia's compiler. | 58 | Node seg_mask = BitfieldExtract(mask, 8, 16); |
| 57 | if (instr.shfl.operation == ShuffleOperation::Up) { | ||
| 58 | return Operation( | ||
| 59 | OperationCode::ILogicalShiftRight, | ||
| 60 | Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), | ||
| 61 | Immediate(8)); | ||
| 62 | } else { | ||
| 63 | return Operation(OperationCode::ILogicalShiftRight, | ||
| 64 | Operation(OperationCode::IAdd, Immediate(0x201F), | ||
| 65 | Operation(OperationCode::INegate, std::move(mask))), | ||
| 66 | Immediate(8)); | ||
| 67 | } | ||
| 68 | }(); | ||
| 69 | 59 | ||
| 70 | const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { | 60 | Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); |
| 61 | Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); | ||
| 62 | Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, | ||
| 63 | Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); | ||
| 64 | |||
| 65 | Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { | ||
| 71 | switch (instr.shfl.operation) { | 66 | switch (instr.shfl.operation) { |
| 72 | case ShuffleOperation::Idx: | 67 | case ShuffleOperation::Idx: |
| 73 | return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; | 68 | return Operation(OperationCode::IBitwiseOr, |
| 74 | case ShuffleOperation::Up: | 69 | Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), |
| 75 | return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; | 70 | min_thread_id); |
| 76 | case ShuffleOperation::Down: | 71 | case ShuffleOperation::Down: |
| 77 | return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; | 72 | return Operation(OperationCode::IAdd, thread_id, index); |
| 73 | case ShuffleOperation::Up: | ||
| 74 | return Operation(OperationCode::IAdd, thread_id, | ||
| 75 | Operation(OperationCode::INegate, index)); | ||
| 78 | case ShuffleOperation::Bfly: | 76 | case ShuffleOperation::Bfly: |
| 79 | return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; | 77 | return Operation(OperationCode::IBitwiseXor, thread_id, index); |
| 80 | } | 78 | } |
| 81 | UNREACHABLE_MSG("Invalid SHFL operation: {}", | 79 | UNREACHABLE(); |
| 82 | static_cast<u64>(instr.shfl.operation.Value())); | 80 | return Immediate(0U); |
| 83 | return {}; | ||
| 84 | }(); | 81 | }(); |
| 85 | 82 | ||
| 86 | // Setting the predicate before the register is intentional to avoid overwriting. | 83 | Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { |
| 87 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) | 84 | if (instr.shfl.operation == ShuffleOperation::Up) { |
| 88 | : GetRegister(instr.gpr20); | 85 | return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); |
| 89 | SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); | 86 | } else { |
| 87 | return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); | ||
| 88 | } | ||
| 89 | }(); | ||
| 90 | |||
| 91 | SetPredicate(bb, instr.shfl.pred48, in_bounds); | ||
| 90 | SetRegister( | 92 | SetRegister( |
| 91 | bb, instr.gpr0, | 93 | bb, instr.gpr0, |
| 92 | Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); | 94 | Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); |
| 95 | break; | ||
| 96 | } | ||
| 97 | case OpCode::Id::FSWZADD: { | ||
| 98 | UNIMPLEMENTED_IF(instr.fswzadd.ndv); | ||
| 99 | |||
| 100 | Node op_a = GetRegister(instr.gpr8); | ||
| 101 | Node op_b = GetRegister(instr.gpr20); | ||
| 102 | Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); | ||
| 103 | SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); | ||
| 93 | break; | 104 | break; |
| 94 | } | 105 | } |
| 95 | default: | 106 | default: |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4300d9ff4..54217e6a4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -47,6 +47,7 @@ enum class OperationCode { | |||
| 47 | FTrunc, /// (MetaArithmetic, float a) -> float | 47 | FTrunc, /// (MetaArithmetic, float a) -> float |
| 48 | FCastInteger, /// (MetaArithmetic, int a) -> float | 48 | FCastInteger, /// (MetaArithmetic, int a) -> float |
| 49 | FCastUInteger, /// (MetaArithmetic, uint a) -> float | 49 | FCastUInteger, /// (MetaArithmetic, uint a) -> float |
| 50 | FSwizzleAdd, /// (float a, float b, uint mask) -> float | ||
| 50 | 51 | ||
| 51 | IAdd, /// (MetaArithmetic, int a, int b) -> int | 52 | IAdd, /// (MetaArithmetic, int a, int b) -> int |
| 52 | IMul, /// (MetaArithmetic, int a, int b) -> int | 53 | IMul, /// (MetaArithmetic, int a, int b) -> int |
| @@ -181,15 +182,8 @@ enum class OperationCode { | |||
| 181 | VoteAny, /// (bool) -> bool | 182 | VoteAny, /// (bool) -> bool |
| 182 | VoteEqual, /// (bool) -> bool | 183 | VoteEqual, /// (bool) -> bool |
| 183 | 184 | ||
| 184 | ShuffleIndexed, /// (uint value, uint index, uint width) -> uint | 185 | ThreadId, /// () -> uint |
| 185 | ShuffleUp, /// (uint value, uint index, uint width) -> uint | 186 | ShuffleIndexed, /// (uint value, uint index) -> uint |
| 186 | ShuffleDown, /// (uint value, uint index, uint width) -> uint | ||
| 187 | ShuffleButterfly, /// (uint value, uint index, uint width) -> uint | ||
| 188 | |||
| 189 | InRangeShuffleIndexed, /// (uint index, uint width) -> bool | ||
| 190 | InRangeShuffleUp, /// (uint index, uint width) -> bool | ||
| 191 | InRangeShuffleDown, /// (uint index, uint width) -> bool | ||
| 192 | InRangeShuffleButterfly, /// (uint index, uint width) -> bool | ||
| 193 | 187 | ||
| 194 | Amount, | 188 | Amount, |
| 195 | }; | 189 | }; |