summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/shader_bytecode.h8
-rw-r--r--src/video_core/shader/decode/half_set.cpp88
2 files changed, 75 insertions, 21 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e7cb87589..d374b73cf 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -661,6 +661,10 @@ union Instruction {
661 constexpr Instruction(u64 value) : value{value} {} 661 constexpr Instruction(u64 value) : value{value} {}
662 constexpr Instruction(const Instruction& instr) : value(instr.value) {} 662 constexpr Instruction(const Instruction& instr) : value(instr.value) {}
663 663
664 constexpr bool Bit(u64 offset) const {
665 return ((value >> offset) & 1) != 0;
666 }
667
664 BitField<0, 8, Register> gpr0; 668 BitField<0, 8, Register> gpr0;
665 BitField<8, 8, Register> gpr8; 669 BitField<8, 8, Register> gpr8;
666 union { 670 union {
@@ -1874,7 +1878,9 @@ public:
1874 HSETP2_C, 1878 HSETP2_C,
1875 HSETP2_R, 1879 HSETP2_R,
1876 HSETP2_IMM, 1880 HSETP2_IMM,
1881 HSET2_C,
1877 HSET2_R, 1882 HSET2_R,
1883 HSET2_IMM,
1878 POPC_C, 1884 POPC_C,
1879 POPC_R, 1885 POPC_R,
1880 POPC_IMM, 1886 POPC_IMM,
@@ -2194,7 +2200,9 @@ private:
2194 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), 2200 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
2195 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), 2201 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
2196 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), 2202 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
2203 INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
2197 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 2204 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
2205 INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
2198 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), 2206 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
2199 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), 2207 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
2200 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 2208 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 848e46874..b2e88fa20 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -13,55 +13,101 @@
13 13
14namespace VideoCommon::Shader { 14namespace VideoCommon::Shader {
15 15
16using std::move;
16using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::PredCondition;
18 20
19u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { 21u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr); 23 const auto opcode = OpCode::Decode(instr);
22 24
23 if (instr.hset2.ftz == 0) { 25 PredCondition cond;
24 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); 26 bool bf;
27 bool ftz;
28 bool neg_a;
29 bool abs_a;
30 bool neg_b;
31 bool abs_b;
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSET2_C:
34 case OpCode::Id::HSET2_IMM:
35 cond = instr.hsetp2.cbuf_and_imm.cond;
36 bf = instr.Bit(53);
37 ftz = instr.Bit(54);
38 neg_a = instr.Bit(43);
39 abs_a = instr.Bit(44);
40 neg_b = instr.Bit(56);
41 abs_b = instr.Bit(54);
42 break;
43 case OpCode::Id::HSET2_R:
44 cond = instr.hsetp2.reg.cond;
45 bf = instr.Bit(49);
46 ftz = instr.Bit(50);
47 neg_a = instr.Bit(43);
48 abs_a = instr.Bit(44);
49 neg_b = instr.Bit(31);
50 abs_b = instr.Bit(30);
51 break;
52 default:
53 UNREACHABLE();
25 } 54 }
26 55
27 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); 56 Node op_b = [this, instr, opcode] {
28 op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
29
30 Node op_b = [&]() {
31 switch (opcode->get().GetId()) { 57 switch (opcode->get().GetId()) {
58 case OpCode::Id::HSET2_C:
59 // Inform as unimplemented as this is not tested.
60 UNIMPLEMENTED_MSG("HSET2_C is not implemented");
61 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
32 case OpCode::Id::HSET2_R: 62 case OpCode::Id::HSET2_R:
33 return GetRegister(instr.gpr20); 63 return GetRegister(instr.gpr20);
64 case OpCode::Id::HSET2_IMM:
65 return UnpackHalfImmediate(instr, true);
34 default: 66 default:
35 UNREACHABLE(); 67 UNREACHABLE();
36 return Immediate(0); 68 return Node{};
37 } 69 }
38 }(); 70 }();
39 op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
40 op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
41 71
42 const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); 72 if (!ftz) {
73 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
74 }
75
76 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
77 op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
78
79 switch (opcode->get().GetId()) {
80 case OpCode::Id::HSET2_R:
81 op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
82 [[fallthrough]];
83 case OpCode::Id::HSET2_C:
84 op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
85 break;
86 default:
87 break;
88 }
43 89
44 const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); 90 Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
91
92 Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
45 93
46 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); 94 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
47 95
48 // HSET2 operates on each half float in the pack. 96 // HSET2 operates on each half float in the pack.
49 std::array<Node, 2> values; 97 std::array<Node, 2> values;
50 for (u32 i = 0; i < 2; ++i) { 98 for (u32 i = 0; i < 2; ++i) {
51 const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; 99 const u32 raw_value = bf ? 0x3c00 : 0xffff;
52 const Node true_value = Immediate(raw_value << (i * 16)); 100 Node true_value = Immediate(raw_value << (i * 16));
53 const Node false_value = Immediate(0); 101 Node false_value = Immediate(0);
54
55 const Node comparison =
56 Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
57 const Node predicate = Operation(combiner, comparison, second_pred);
58 102
103 Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
104 Node predicate = Operation(combiner, comparison, second_pred);
59 values[i] = 105 values[i] =
60 Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); 106 Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
61 } 107 }
62 108
63 const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); 109 Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
64 SetRegister(bb, instr.gpr0, value); 110 SetRegister(bb, instr.gpr0, move(value));
65 111
66 return pc; 112 return pc;
67} 113}