summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/shader_bytecode.h10
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp76
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp49
-rw-r--r--src/video_core/shader/decode/warp.cpp79
-rw-r--r--src/video_core/shader/node.h12
8 files changed, 127 insertions, 125 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 78d6886fb..9fafed4a2 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -616,6 +616,14 @@ union Instruction {
616 } shfl; 616 } shfl;
617 617
618 union { 618 union {
619 BitField<44, 1, u64> ftz;
620 BitField<39, 2, u64> tab5cb8_2;
621 BitField<38, 1, u64> ndv;
622 BitField<47, 1, u64> cc;
623 BitField<28, 8, u64> swizzle;
624 } fswzadd;
625
626 union {
619 BitField<8, 8, Register> gpr; 627 BitField<8, 8, Register> gpr;
620 BitField<20, 24, s64> offset; 628 BitField<20, 24, s64> offset;
621 } gmem; 629 } gmem;
@@ -1592,6 +1600,7 @@ public:
1592 DEPBAR, 1600 DEPBAR,
1593 VOTE, 1601 VOTE,
1594 SHFL, 1602 SHFL,
1603 FSWZADD,
1595 BFE_C, 1604 BFE_C,
1596 BFE_R, 1605 BFE_R,
1597 BFE_IMM, 1606 BFE_IMM,
@@ -1890,6 +1899,7 @@ private:
1890 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 1899 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
1891 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), 1900 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
1892 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), 1901 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
1902 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
1893 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 1903 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
1894 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), 1904 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
1895 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), 1905 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index c65b24c69..b30d5be74 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -62,6 +62,7 @@ Device::Device() {
62 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 62 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
63 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && 63 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
64 GLAD_GL_NV_shader_thread_shuffle; 64 GLAD_GL_NV_shader_thread_shuffle;
65 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
65 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; 66 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
66 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); 67 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
67 has_variable_aoffi = TestVariableAoffi(); 68 has_variable_aoffi = TestVariableAoffi();
@@ -79,6 +80,7 @@ Device::Device(std::nullptr_t) {
79 max_vertex_attributes = 16; 80 max_vertex_attributes = 16;
80 max_varyings = 15; 81 max_varyings = 15;
81 has_warp_intrinsics = true; 82 has_warp_intrinsics = true;
83 has_shader_ballot = true;
82 has_vertex_viewport_layer = true; 84 has_vertex_viewport_layer = true;
83 has_image_load_formatted = true; 85 has_image_load_formatted = true;
84 has_variable_aoffi = true; 86 has_variable_aoffi = true;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index bf35bd0b6..6c86fe207 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -34,6 +34,10 @@ public:
34 return has_warp_intrinsics; 34 return has_warp_intrinsics;
35 } 35 }
36 36
37 bool HasShaderBallot() const {
38 return has_shader_ballot;
39 }
40
37 bool HasVertexViewportLayer() const { 41 bool HasVertexViewportLayer() const {
38 return has_vertex_viewport_layer; 42 return has_vertex_viewport_layer;
39 } 43 }
@@ -68,6 +72,7 @@ private:
68 u32 max_vertex_attributes{}; 72 u32 max_vertex_attributes{};
69 u32 max_varyings{}; 73 u32 max_varyings{};
70 bool has_warp_intrinsics{}; 74 bool has_warp_intrinsics{};
75 bool has_shader_ballot{};
71 bool has_vertex_viewport_layer{}; 76 bool has_vertex_viewport_layer{};
72 bool has_image_load_formatted{}; 77 bool has_image_load_formatted{};
73 bool has_variable_aoffi{}; 78 bool has_variable_aoffi{};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 35e5214a5..04a239a39 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -275,16 +275,25 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
275 std::string source = fmt::format(R"(// {} 275 std::string source = fmt::format(R"(// {}
276#version 430 core 276#version 430 core
277#extension GL_ARB_separate_shader_objects : enable 277#extension GL_ARB_separate_shader_objects : enable
278#extension GL_ARB_shader_viewport_layer_array : enable
279#extension GL_EXT_shader_image_load_formatted : enable
280#extension GL_NV_gpu_shader5 : enable
281#extension GL_NV_shader_thread_group : enable
282#extension GL_NV_shader_thread_shuffle : enable
283)", 278)",
284 GetShaderId(unique_identifier, program_type)); 279 GetShaderId(unique_identifier, program_type));
285 if (is_compute) { 280 if (is_compute) {
286 source += "#extension GL_ARB_compute_variable_group_size : require\n"; 281 source += "#extension GL_ARB_compute_variable_group_size : require\n";
287 } 282 }
283 if (device.HasShaderBallot()) {
284 source += "#extension GL_ARB_shader_ballot : require\n";
285 }
286 if (device.HasVertexViewportLayer()) {
287 source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
288 }
289 if (device.HasImageLoadFormatted()) {
290 source += "#extension GL_EXT_shader_image_load_formatted : require\n";
291 }
292 if (device.HasWarpIntrinsics()) {
293 source += "#extension GL_NV_gpu_shader5 : require\n"
294 "#extension GL_NV_shader_thread_group : require\n"
295 "#extension GL_NV_shader_thread_shuffle : require\n";
296 }
288 source += '\n'; 297 source += '\n';
289 298
290 if (!is_compute) { 299 if (!is_compute) {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 0ce59a852..e56ed51de 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1379,6 +1379,26 @@ private:
1379 return GenerateUnary(operation, "float", Type::Float, type); 1379 return GenerateUnary(operation, "float", Type::Float, type);
1380 } 1380 }
1381 1381
1382 Expression FSwizzleAdd(Operation operation) {
1383 const std::string op_a = VisitOperand(operation, 0).AsFloat();
1384 const std::string op_b = VisitOperand(operation, 1).AsFloat();
1385
1386 if (!device.HasShaderBallot()) {
1387 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
1388 return {fmt::format("{} + {}", op_a, op_b), Type::Float};
1389 }
1390
1391 const std::string instr_mask = VisitOperand(operation, 2).AsUint();
1392 const std::string mask = code.GenerateTemporary();
1393 code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
1394 instr_mask);
1395
1396 const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
1397 const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
1398 return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
1399 Type::Float};
1400 }
1401
1382 Expression ICastFloat(Operation operation) { 1402 Expression ICastFloat(Operation operation) {
1383 return GenerateUnary(operation, "int", Type::Int, Type::Float); 1403 return GenerateUnary(operation, "int", Type::Int, Type::Float);
1384 } 1404 }
@@ -1942,34 +1962,24 @@ private:
1942 return Vote(operation, "allThreadsEqualNV"); 1962 return Vote(operation, "allThreadsEqualNV");
1943 } 1963 }
1944 1964
1945 template <const std::string_view& func> 1965 Expression ThreadId(Operation operation) {
1946 Expression Shuffle(Operation operation) { 1966 if (!device.HasShaderBallot()) {
1947 const std::string value = VisitOperand(operation, 0).AsFloat(); 1967 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
1948 if (!device.HasWarpIntrinsics()) { 1968 return {"0U", Type::Uint};
1949 LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
1950 // On a "single-thread" device we are either on the same thread or out of bounds. Both
1951 // cases return the passed value.
1952 return {value, Type::Float};
1953 } 1969 }
1954 1970 return {"gl_SubGroupInvocationARB", Type::Uint};
1955 const std::string index = VisitOperand(operation, 1).AsUint();
1956 const std::string width = VisitOperand(operation, 2).AsUint();
1957 return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
1958 } 1971 }
1959 1972
1960 template <const std::string_view& func> 1973 Expression ShuffleIndexed(Operation operation) {
1961 Expression InRangeShuffle(Operation operation) { 1974 std::string value = VisitOperand(operation, 0).AsFloat();
1962 const std::string index = VisitOperand(operation, 0).AsUint(); 1975
1963 const std::string width = VisitOperand(operation, 1).AsUint(); 1976 if (!device.HasShaderBallot()) {
1964 if (!device.HasWarpIntrinsics()) { 1977 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
1965 // On a "single-thread" device we are only in bounds when the requested index is 0. 1978 return {std::move(value), Type::Float};
1966 return {fmt::format("({} == 0U)", index), Type::Bool};
1967 } 1979 }
1968 1980
1969 const std::string in_range = code.GenerateTemporary(); 1981 const std::string index = VisitOperand(operation, 1).AsUint();
1970 code.AddLine("bool {};", in_range); 1982 return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
1971 code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
1972 return {in_range, Type::Bool};
1973 } 1983 }
1974 1984
1975 struct Func final { 1985 struct Func final {
@@ -1981,11 +1991,6 @@ private:
1981 static constexpr std::string_view Or = "Or"; 1991 static constexpr std::string_view Or = "Or";
1982 static constexpr std::string_view Xor = "Xor"; 1992 static constexpr std::string_view Xor = "Xor";
1983 static constexpr std::string_view Exchange = "Exchange"; 1993 static constexpr std::string_view Exchange = "Exchange";
1984
1985 static constexpr std::string_view ShuffleIndexed = "shuffleNV";
1986 static constexpr std::string_view ShuffleUp = "shuffleUpNV";
1987 static constexpr std::string_view ShuffleDown = "shuffleDownNV";
1988 static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
1989 }; 1994 };
1990 1995
1991 static constexpr std::array operation_decompilers = { 1996 static constexpr std::array operation_decompilers = {
@@ -2016,6 +2021,7 @@ private:
2016 &GLSLDecompiler::FTrunc, 2021 &GLSLDecompiler::FTrunc,
2017 &GLSLDecompiler::FCastInteger<Type::Int>, 2022 &GLSLDecompiler::FCastInteger<Type::Int>,
2018 &GLSLDecompiler::FCastInteger<Type::Uint>, 2023 &GLSLDecompiler::FCastInteger<Type::Uint>,
2024 &GLSLDecompiler::FSwizzleAdd,
2019 2025
2020 &GLSLDecompiler::Add<Type::Int>, 2026 &GLSLDecompiler::Add<Type::Int>,
2021 &GLSLDecompiler::Mul<Type::Int>, 2027 &GLSLDecompiler::Mul<Type::Int>,
@@ -2151,15 +2157,8 @@ private:
2151 &GLSLDecompiler::VoteAny, 2157 &GLSLDecompiler::VoteAny,
2152 &GLSLDecompiler::VoteEqual, 2158 &GLSLDecompiler::VoteEqual,
2153 2159
2154 &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, 2160 &GLSLDecompiler::ThreadId,
2155 &GLSLDecompiler::Shuffle<Func::ShuffleUp>, 2161 &GLSLDecompiler::ShuffleIndexed,
2156 &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
2157 &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
2158
2159 &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
2160 &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
2161 &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
2162 &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
2163 }; 2162 };
2164 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2163 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2165 2164
@@ -2492,6 +2491,9 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
2492 bvec2 is_nan2 = isnan(pair2); 2491 bvec2 is_nan2 = isnan(pair2);
2493 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); 2492 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
2494} 2493}
2494
2495const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
2496const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
2495)"; 2497)";
2496} 2498}
2497 2499
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 42cf068b6..2850d5b59 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -783,6 +783,11 @@ private:
783 return {}; 783 return {};
784 } 784 }
785 785
786 Id FSwizzleAdd(Operation operation) {
787 UNIMPLEMENTED();
788 return {};
789 }
790
786 Id HNegate(Operation operation) { 791 Id HNegate(Operation operation) {
787 UNIMPLEMENTED(); 792 UNIMPLEMENTED();
788 return {}; 793 return {};
@@ -1195,42 +1200,12 @@ private:
1195 return {}; 1200 return {};
1196 } 1201 }
1197 1202
1198 Id ShuffleIndexed(Operation) { 1203 Id ThreadId(Operation) {
1199 UNIMPLEMENTED();
1200 return {};
1201 }
1202
1203 Id ShuffleUp(Operation) {
1204 UNIMPLEMENTED();
1205 return {};
1206 }
1207
1208 Id ShuffleDown(Operation) {
1209 UNIMPLEMENTED();
1210 return {};
1211 }
1212
1213 Id ShuffleButterfly(Operation) {
1214 UNIMPLEMENTED();
1215 return {};
1216 }
1217
1218 Id InRangeShuffleIndexed(Operation) {
1219 UNIMPLEMENTED(); 1204 UNIMPLEMENTED();
1220 return {}; 1205 return {};
1221 } 1206 }
1222 1207
1223 Id InRangeShuffleUp(Operation) { 1208 Id ShuffleIndexed(Operation) {
1224 UNIMPLEMENTED();
1225 return {};
1226 }
1227
1228 Id InRangeShuffleDown(Operation) {
1229 UNIMPLEMENTED();
1230 return {};
1231 }
1232
1233 Id InRangeShuffleButterfly(Operation) {
1234 UNIMPLEMENTED(); 1209 UNIMPLEMENTED();
1235 return {}; 1210 return {};
1236 } 1211 }
@@ -1393,6 +1368,7 @@ private:
1393 &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, 1368 &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
1394 &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, 1369 &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
1395 &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, 1370 &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
1371 &SPIRVDecompiler::FSwizzleAdd,
1396 1372
1397 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, 1373 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
1398 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, 1374 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
@@ -1528,15 +1504,8 @@ private:
1528 &SPIRVDecompiler::VoteAny, 1504 &SPIRVDecompiler::VoteAny,
1529 &SPIRVDecompiler::VoteEqual, 1505 &SPIRVDecompiler::VoteEqual,
1530 1506
1507 &SPIRVDecompiler::ThreadId,
1531 &SPIRVDecompiler::ShuffleIndexed, 1508 &SPIRVDecompiler::ShuffleIndexed,
1532 &SPIRVDecompiler::ShuffleUp,
1533 &SPIRVDecompiler::ShuffleDown,
1534 &SPIRVDecompiler::ShuffleButterfly,
1535
1536 &SPIRVDecompiler::InRangeShuffleIndexed,
1537 &SPIRVDecompiler::InRangeShuffleUp,
1538 &SPIRVDecompiler::InRangeShuffleDown,
1539 &SPIRVDecompiler::InRangeShuffleButterfly,
1540 }; 1509 };
1541 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 1510 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1542 1511
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index fa8a250cc..d98d0e1dd 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation;
17using Tegra::Shader::VoteOperation; 17using Tegra::Shader::VoteOperation;
18 18
19namespace { 19namespace {
20
20OperationCode GetOperationCode(VoteOperation vote_op) { 21OperationCode GetOperationCode(VoteOperation vote_op) {
21 switch (vote_op) { 22 switch (vote_op) {
22 case VoteOperation::All: 23 case VoteOperation::All:
@@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
30 return OperationCode::VoteAll; 31 return OperationCode::VoteAll;
31 } 32 }
32} 33}
34
33} // Anonymous namespace 35} // Anonymous namespace
34 36
35u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { 37u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
@@ -46,50 +48,59 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
46 break; 48 break;
47 } 49 }
48 case OpCode::Id::SHFL: { 50 case OpCode::Id::SHFL: {
49 Node width = [this, instr] { 51 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
50 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) 52 : GetRegister(instr.gpr39);
51 : GetRegister(instr.gpr39); 53 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
52 54 : GetRegister(instr.gpr20);
53 // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has 55
54 // been done reversing Nvidia's math. It won't work on all cases due to SHFL having 56 Node thread_id = Operation(OperationCode::ThreadId);
55 // different parameters that don't properly map to GLSL's interface, but it should work 57 Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
56 // for cases emitted by Nvidia's compiler. 58 Node seg_mask = BitfieldExtract(mask, 8, 16);
57 if (instr.shfl.operation == ShuffleOperation::Up) {
58 return Operation(
59 OperationCode::ILogicalShiftRight,
60 Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
61 Immediate(8));
62 } else {
63 return Operation(OperationCode::ILogicalShiftRight,
64 Operation(OperationCode::IAdd, Immediate(0x201F),
65 Operation(OperationCode::INegate, std::move(mask))),
66 Immediate(8));
67 }
68 }();
69 59
70 const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { 60 Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
61 Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
62 Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
63 Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
64
65 Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
71 switch (instr.shfl.operation) { 66 switch (instr.shfl.operation) {
72 case ShuffleOperation::Idx: 67 case ShuffleOperation::Idx:
73 return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; 68 return Operation(OperationCode::IBitwiseOr,
74 case ShuffleOperation::Up: 69 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
75 return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; 70 min_thread_id);
76 case ShuffleOperation::Down: 71 case ShuffleOperation::Down:
77 return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; 72 return Operation(OperationCode::IAdd, thread_id, index);
73 case ShuffleOperation::Up:
74 return Operation(OperationCode::IAdd, thread_id,
75 Operation(OperationCode::INegate, index));
78 case ShuffleOperation::Bfly: 76 case ShuffleOperation::Bfly:
79 return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; 77 return Operation(OperationCode::IBitwiseXor, thread_id, index);
80 } 78 }
81 UNREACHABLE_MSG("Invalid SHFL operation: {}", 79 UNREACHABLE();
82 static_cast<u64>(instr.shfl.operation.Value())); 80 return Immediate(0U);
83 return {};
84 }(); 81 }();
85 82
86 // Setting the predicate before the register is intentional to avoid overwriting. 83 Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
87 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) 84 if (instr.shfl.operation == ShuffleOperation::Up) {
88 : GetRegister(instr.gpr20); 85 return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
89 SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); 86 } else {
87 return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
88 }
89 }();
90
91 SetPredicate(bb, instr.shfl.pred48, in_bounds);
90 SetRegister( 92 SetRegister(
91 bb, instr.gpr0, 93 bb, instr.gpr0,
92 Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); 94 Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
95 break;
96 }
97 case OpCode::Id::FSWZADD: {
98 UNIMPLEMENTED_IF(instr.fswzadd.ndv);
99
100 Node op_a = GetRegister(instr.gpr8);
101 Node op_b = GetRegister(instr.gpr20);
102 Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
103 SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
93 break; 104 break;
94 } 105 }
95 default: 106 default:
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4300d9ff4..54217e6a4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -47,6 +47,7 @@ enum class OperationCode {
47 FTrunc, /// (MetaArithmetic, float a) -> float 47 FTrunc, /// (MetaArithmetic, float a) -> float
48 FCastInteger, /// (MetaArithmetic, int a) -> float 48 FCastInteger, /// (MetaArithmetic, int a) -> float
49 FCastUInteger, /// (MetaArithmetic, uint a) -> float 49 FCastUInteger, /// (MetaArithmetic, uint a) -> float
50 FSwizzleAdd, /// (float a, float b, uint mask) -> float
50 51
51 IAdd, /// (MetaArithmetic, int a, int b) -> int 52 IAdd, /// (MetaArithmetic, int a, int b) -> int
52 IMul, /// (MetaArithmetic, int a, int b) -> int 53 IMul, /// (MetaArithmetic, int a, int b) -> int
@@ -181,15 +182,8 @@ enum class OperationCode {
181 VoteAny, /// (bool) -> bool 182 VoteAny, /// (bool) -> bool
182 VoteEqual, /// (bool) -> bool 183 VoteEqual, /// (bool) -> bool
183 184
184 ShuffleIndexed, /// (uint value, uint index, uint width) -> uint 185 ThreadId, /// () -> uint
185 ShuffleUp, /// (uint value, uint index, uint width) -> uint 186 ShuffleIndexed, /// (uint value, uint index) -> uint
186 ShuffleDown, /// (uint value, uint index, uint width) -> uint
187 ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
188
189 InRangeShuffleIndexed, /// (uint index, uint width) -> bool
190 InRangeShuffleUp, /// (uint index, uint width) -> bool
191 InRangeShuffleDown, /// (uint index, uint width) -> bool
192 InRangeShuffleButterfly, /// (uint index, uint width) -> bool
193 187
194 Amount, 188 Amount,
195}; 189};