summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2019-07-20 17:25:08 -0400
committerGravatar GitHub2019-07-20 17:25:08 -0400
commit0a67416971c15b205f61e5db4ec5955b49638774 (patch)
treeb4e2498af98cfa334b801b5b71263a0330579cc0
parentUpdate README.md (diff)
parentshader/half_set_predicate: Fix HSETP2 implementation (diff)
downloadyuzu-0a67416971c15b205f61e5db4ec5955b49638774.tar.gz
yuzu-0a67416971c15b205f61e5db4ec5955b49638774.tar.xz
yuzu-0a67416971c15b205f61e5db4ec5955b49638774.zip
Merge pull request #2693 from ReinUsesLisp/hsetp2
shader/half_set_predicate: Implement missing HSETP2 variants
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/shader_bytecode.h26
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp17
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp69
-rw-r--r--src/video_core/shader/node.h3
5 files changed, 70 insertions, 61 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 79d469b88..8520a0143 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -931,8 +931,6 @@ union Instruction {
931 } csetp; 931 } csetp;
932 932
933 union { 933 union {
934 BitField<35, 4, PredCondition> cond;
935 BitField<49, 1, u64> h_and;
936 BitField<6, 1, u64> ftz; 934 BitField<6, 1, u64> ftz;
937 BitField<45, 2, PredOperation> op; 935 BitField<45, 2, PredOperation> op;
938 BitField<3, 3, u64> pred3; 936 BitField<3, 3, u64> pred3;
@@ -940,9 +938,21 @@ union Instruction {
940 BitField<43, 1, u64> negate_a; 938 BitField<43, 1, u64> negate_a;
941 BitField<44, 1, u64> abs_a; 939 BitField<44, 1, u64> abs_a;
942 BitField<47, 2, HalfType> type_a; 940 BitField<47, 2, HalfType> type_a;
943 BitField<31, 1, u64> negate_b; 941 union {
944 BitField<30, 1, u64> abs_b; 942 BitField<35, 4, PredCondition> cond;
945 BitField<28, 2, HalfType> type_b; 943 BitField<49, 1, u64> h_and;
944 BitField<31, 1, u64> negate_b;
945 BitField<30, 1, u64> abs_b;
946 BitField<28, 2, HalfType> type_b;
947 } reg;
948 union {
949 BitField<56, 1, u64> negate_b;
950 BitField<54, 1, u64> abs_b;
951 } cbuf;
952 union {
953 BitField<49, 4, PredCondition> cond;
954 BitField<53, 1, u64> h_and;
955 } cbuf_and_imm;
946 BitField<42, 1, u64> neg_pred; 956 BitField<42, 1, u64> neg_pred;
947 BitField<39, 3, u64> pred39; 957 BitField<39, 3, u64> pred39;
948 } hsetp2; 958 } hsetp2;
@@ -1548,7 +1558,9 @@ public:
1548 HFMA2_RC, 1558 HFMA2_RC,
1549 HFMA2_RR, 1559 HFMA2_RR,
1550 HFMA2_IMM_R, 1560 HFMA2_IMM_R,
1561 HSETP2_C,
1551 HSETP2_R, 1562 HSETP2_R,
1563 HSETP2_IMM,
1552 HSET2_R, 1564 HSET2_R,
1553 POPC_C, 1565 POPC_C,
1554 POPC_R, 1566 POPC_R,
@@ -1831,7 +1843,9 @@ private:
1831 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), 1843 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
1832 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), 1844 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
1833 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), 1845 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
1834 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), 1846 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
1847 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
1848 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
1835 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 1849 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
1836 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 1850 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
1837 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), 1851 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 119073776..e19d502bc 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -254,10 +254,6 @@ public:
254 } 254 }
255 255
256private: 256private:
257 using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
258 using OperationDecompilersArray =
259 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
260
261 void DeclareVertex() { 257 void DeclareVertex() {
262 if (stage != ShaderStage::Vertex) 258 if (stage != ShaderStage::Vertex)
263 return; 259 return;
@@ -1400,14 +1396,10 @@ private:
1400 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); 1396 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
1401 } 1397 }
1402 1398
1403 std::string LogicalAll2(Operation operation) { 1399 std::string LogicalAnd2(Operation operation) {
1404 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); 1400 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1405 } 1401 }
1406 1402
1407 std::string LogicalAny2(Operation operation) {
1408 return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
1409 }
1410
1411 template <bool with_nan> 1403 template <bool with_nan>
1412 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { 1404 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
1413 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, 1405 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
@@ -1714,7 +1706,7 @@ private:
1714 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; 1706 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
1715 } 1707 }
1716 1708
1717 static constexpr OperationDecompilersArray operation_decompilers = { 1709 static constexpr std::array operation_decompilers = {
1718 &GLSLDecompiler::Assign, 1710 &GLSLDecompiler::Assign,
1719 1711
1720 &GLSLDecompiler::Select, 1712 &GLSLDecompiler::Select,
@@ -1798,8 +1790,7 @@ private:
1798 &GLSLDecompiler::LogicalXor, 1790 &GLSLDecompiler::LogicalXor,
1799 &GLSLDecompiler::LogicalNegate, 1791 &GLSLDecompiler::LogicalNegate,
1800 &GLSLDecompiler::LogicalPick2, 1792 &GLSLDecompiler::LogicalPick2,
1801 &GLSLDecompiler::LogicalAll2, 1793 &GLSLDecompiler::LogicalAnd2,
1802 &GLSLDecompiler::LogicalAny2,
1803 1794
1804 &GLSLDecompiler::LogicalLessThan<Type::Float>, 1795 &GLSLDecompiler::LogicalLessThan<Type::Float>,
1805 &GLSLDecompiler::LogicalEqual<Type::Float>, 1796 &GLSLDecompiler::LogicalEqual<Type::Float>,
@@ -1863,6 +1854,7 @@ private:
1863 &GLSLDecompiler::WorkGroupId<1>, 1854 &GLSLDecompiler::WorkGroupId<1>,
1864 &GLSLDecompiler::WorkGroupId<2>, 1855 &GLSLDecompiler::WorkGroupId<2>,
1865 }; 1856 };
1857 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1866 1858
1867 std::string GetRegister(u32 index) const { 1859 std::string GetRegister(u32 index) const {
1868 return GetDeclarationWithSuffix(index, "gpr"); 1860 return GetDeclarationWithSuffix(index, "gpr");
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 9b2d8e987..d267712c9 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -205,10 +205,6 @@ public:
205 } 205 }
206 206
207private: 207private:
208 using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
209 using OperationDecompilersArray =
210 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
211
212 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); 208 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
213 209
214 void AllocateBindings() { 210 void AllocateBindings() {
@@ -804,12 +800,7 @@ private:
804 return {}; 800 return {};
805 } 801 }
806 802
807 Id LogicalAll2(Operation operation) { 803 Id LogicalAnd2(Operation operation) {
808 UNIMPLEMENTED();
809 return {};
810 }
811
812 Id LogicalAny2(Operation operation) {
813 UNIMPLEMENTED(); 804 UNIMPLEMENTED();
814 return {}; 805 return {};
815 } 806 }
@@ -1206,7 +1197,7 @@ private:
1206 return {}; 1197 return {};
1207 } 1198 }
1208 1199
1209 static constexpr OperationDecompilersArray operation_decompilers = { 1200 static constexpr std::array operation_decompilers = {
1210 &SPIRVDecompiler::Assign, 1201 &SPIRVDecompiler::Assign,
1211 1202
1212 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, 1203 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
@@ -1291,8 +1282,7 @@ private:
1291 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, 1282 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
1292 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, 1283 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
1293 &SPIRVDecompiler::LogicalPick2, 1284 &SPIRVDecompiler::LogicalPick2,
1294 &SPIRVDecompiler::LogicalAll2, 1285 &SPIRVDecompiler::LogicalAnd2,
1295 &SPIRVDecompiler::LogicalAny2,
1296 1286
1297 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, 1287 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
1298 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, 1288 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
@@ -1357,6 +1347,7 @@ private:
1357 &SPIRVDecompiler::WorkGroupId<1>, 1347 &SPIRVDecompiler::WorkGroupId<1>,
1358 &SPIRVDecompiler::WorkGroupId<2>, 1348 &SPIRVDecompiler::WorkGroupId<2>,
1359 }; 1349 };
1350 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1360 1351
1361 const VKDevice& device; 1352 const VKDevice& device;
1362 const ShaderIR& ir; 1353 const ShaderIR& ir;
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index d59d15bd8..ad180d6df 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -23,38 +23,51 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); 23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); 24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
25 25
26 Node op_b = [&]() { 26 Tegra::Shader::PredCondition cond{};
27 switch (opcode->get().GetId()) { 27 bool h_and{};
28 case OpCode::Id::HSETP2_R: 28 Node op_b{};
29 return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, 29 switch (opcode->get().GetId()) {
30 instr.hsetp2.negate_b); 30 case OpCode::Id::HSETP2_C:
31 default: 31 cond = instr.hsetp2.cbuf_and_imm.cond;
32 UNREACHABLE(); 32 h_and = instr.hsetp2.cbuf_and_imm.h_and;
33 return Immediate(0); 33 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
34 } 34 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
35 }(); 35 break;
36 op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); 36 case OpCode::Id::HSETP2_IMM:
37 37 cond = instr.hsetp2.cbuf_and_imm.cond;
38 // We can't use the constant predicate as destination. 38 h_and = instr.hsetp2.cbuf_and_imm.h_and;
39 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); 39 op_b = UnpackHalfImmediate(instr, true);
40 40 break;
41 const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); 41 case OpCode::Id::HSETP2_R:
42 cond = instr.hsetp2.reg.cond;
43 h_and = instr.hsetp2.reg.h_and;
44 op_b =
45 UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
46 instr.hsetp2.reg.negate_b),
47 instr.hsetp2.reg.type_b);
48 break;
49 default:
50 UNREACHABLE();
51 op_b = Immediate(0);
52 }
42 53
43 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); 54 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
44 const OperationCode pair_combiner = 55 const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
45 instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
46
47 const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
48 const Node first_pred = Operation(pair_combiner, comparison);
49 56
50 // Set the primary predicate to the result of Predicate OP SecondPredicate 57 const auto Write = [&](u64 dest, Node src) {
51 const Node value = Operation(combiner, first_pred, second_pred); 58 SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39));
52 SetPredicate(bb, instr.hsetp2.pred3, value); 59 };
53 60
54 if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 61 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
55 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled 62 const u64 first = instr.hsetp2.pred0;
56 const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); 63 const u64 second = instr.hsetp2.pred3;
57 SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); 64 if (h_and) {
65 const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
66 Write(first, joined);
67 Write(second, Operation(OperationCode::LogicalNegate, joined));
68 } else {
69 Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
70 Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
58 } 71 }
59 72
60 return pc; 73 return pc;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 7427ed896..715184d67 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -101,8 +101,7 @@ enum class OperationCode {
101 LogicalXor, /// (bool a, bool b) -> bool 101 LogicalXor, /// (bool a, bool b) -> bool
102 LogicalNegate, /// (bool a) -> bool 102 LogicalNegate, /// (bool a) -> bool
103 LogicalPick2, /// (bool2 pair, uint index) -> bool 103 LogicalPick2, /// (bool2 pair, uint index) -> bool
104 LogicalAll2, /// (bool2 a) -> bool 104 LogicalAnd2, /// (bool2 a) -> bool
105 LogicalAny2, /// (bool2 a) -> bool
106 105
107 LogicalFLessThan, /// (float a, float b) -> bool 106 LogicalFLessThan, /// (float a, float b) -> bool
108 LogicalFEqual, /// (float a, float b) -> bool 107 LogicalFEqual, /// (float a, float b) -> bool