diff options
| author | 2020-06-26 23:14:56 -0400 | |
|---|---|---|
| committer | 2020-06-26 23:14:56 -0400 | |
| commit | efef7b151734207b4cc9487147d9bed4328c13cc (patch) | |
| tree | 08c28074ff08227f05a43b7914805e600c824243 | |
| parent | Merge pull request #4178 from VolcaEM/patch-6 (diff) | |
| parent | shader/half_set: Implement HSET2_IMM (diff) | |
| download | yuzu-efef7b151734207b4cc9487147d9bed4328c13cc.tar.gz yuzu-efef7b151734207b4cc9487147d9bed4328c13cc.tar.xz yuzu-efef7b151734207b4cc9487147d9bed4328c13cc.zip | |
Merge pull request #4147 from ReinUsesLisp/hset2-imm
shader/half_set: Implement HSET2_IMM
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 8 | ||||
| -rw-r--r-- | src/video_core/shader/decode/half_set.cpp | 88 |
2 files changed, 75 insertions, 21 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e7cb87589..d374b73cf 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -661,6 +661,10 @@ union Instruction { | |||
| 661 | constexpr Instruction(u64 value) : value{value} {} | 661 | constexpr Instruction(u64 value) : value{value} {} |
| 662 | constexpr Instruction(const Instruction& instr) : value(instr.value) {} | 662 | constexpr Instruction(const Instruction& instr) : value(instr.value) {} |
| 663 | 663 | ||
| 664 | constexpr bool Bit(u64 offset) const { | ||
| 665 | return ((value >> offset) & 1) != 0; | ||
| 666 | } | ||
| 667 | |||
| 664 | BitField<0, 8, Register> gpr0; | 668 | BitField<0, 8, Register> gpr0; |
| 665 | BitField<8, 8, Register> gpr8; | 669 | BitField<8, 8, Register> gpr8; |
| 666 | union { | 670 | union { |
| @@ -1874,7 +1878,9 @@ public: | |||
| 1874 | HSETP2_C, | 1878 | HSETP2_C, |
| 1875 | HSETP2_R, | 1879 | HSETP2_R, |
| 1876 | HSETP2_IMM, | 1880 | HSETP2_IMM, |
| 1881 | HSET2_C, | ||
| 1877 | HSET2_R, | 1882 | HSET2_R, |
| 1883 | HSET2_IMM, | ||
| 1878 | POPC_C, | 1884 | POPC_C, |
| 1879 | POPC_R, | 1885 | POPC_R, |
| 1880 | POPC_IMM, | 1886 | POPC_IMM, |
| @@ -2194,7 +2200,9 @@ private: | |||
| 2194 | INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), | 2200 | INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), |
| 2195 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), | 2201 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), |
| 2196 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), | 2202 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), |
| 2203 | INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), | ||
| 2197 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), | 2204 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), |
| 2205 | INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), | ||
| 2198 | INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), | 2206 | INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), |
| 2199 | INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), | 2207 | INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), |
| 2200 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), | 2208 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), |
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 848e46874..b2e88fa20 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp | |||
| @@ -13,55 +13,101 @@ | |||
| 13 | 13 | ||
| 14 | namespace VideoCommon::Shader { | 14 | namespace VideoCommon::Shader { |
| 15 | 15 | ||
| 16 | using std::move; | ||
| 16 | using Tegra::Shader::Instruction; | 17 | using Tegra::Shader::Instruction; |
| 17 | using Tegra::Shader::OpCode; | 18 | using Tegra::Shader::OpCode; |
| 19 | using Tegra::Shader::PredCondition; | ||
| 18 | 20 | ||
| 19 | u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | 21 | u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { |
| 20 | const Instruction instr = {program_code[pc]}; | 22 | const Instruction instr = {program_code[pc]}; |
| 21 | const auto opcode = OpCode::Decode(instr); | 23 | const auto opcode = OpCode::Decode(instr); |
| 22 | 24 | ||
| 23 | if (instr.hset2.ftz == 0) { | 25 | PredCondition cond; |
| 24 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | 26 | bool bf; |
| 27 | bool ftz; | ||
| 28 | bool neg_a; | ||
| 29 | bool abs_a; | ||
| 30 | bool neg_b; | ||
| 31 | bool abs_b; | ||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::HSET2_C: | ||
| 34 | case OpCode::Id::HSET2_IMM: | ||
| 35 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 36 | bf = instr.Bit(53); | ||
| 37 | ftz = instr.Bit(54); | ||
| 38 | neg_a = instr.Bit(43); | ||
| 39 | abs_a = instr.Bit(44); | ||
| 40 | neg_b = instr.Bit(56); | ||
| 41 | abs_b = instr.Bit(54); | ||
| 42 | break; | ||
| 43 | case OpCode::Id::HSET2_R: | ||
| 44 | cond = instr.hsetp2.reg.cond; | ||
| 45 | bf = instr.Bit(49); | ||
| 46 | ftz = instr.Bit(50); | ||
| 47 | neg_a = instr.Bit(43); | ||
| 48 | abs_a = instr.Bit(44); | ||
| 49 | neg_b = instr.Bit(31); | ||
| 50 | abs_b = instr.Bit(30); | ||
| 51 | break; | ||
| 52 | default: | ||
| 53 | UNREACHABLE(); | ||
| 25 | } | 54 | } |
| 26 | 55 | ||
| 27 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); | 56 | Node op_b = [this, instr, opcode] { |
| 28 | op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); | ||
| 29 | |||
| 30 | Node op_b = [&]() { | ||
| 31 | switch (opcode->get().GetId()) { | 57 | switch (opcode->get().GetId()) { |
| 58 | case OpCode::Id::HSET2_C: | ||
| 59 | // Inform as unimplemented as this is not tested. | ||
| 60 | UNIMPLEMENTED_MSG("HSET2_C is not implemented"); | ||
| 61 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 32 | case OpCode::Id::HSET2_R: | 62 | case OpCode::Id::HSET2_R: |
| 33 | return GetRegister(instr.gpr20); | 63 | return GetRegister(instr.gpr20); |
| 64 | case OpCode::Id::HSET2_IMM: | ||
| 65 | return UnpackHalfImmediate(instr, true); | ||
| 34 | default: | 66 | default: |
| 35 | UNREACHABLE(); | 67 | UNREACHABLE(); |
| 36 | return Immediate(0); | 68 | return Node{}; |
| 37 | } | 69 | } |
| 38 | }(); | 70 | }(); |
| 39 | op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); | ||
| 40 | op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); | ||
| 41 | 71 | ||
| 42 | const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | 72 | if (!ftz) { |
| 73 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 74 | } | ||
| 75 | |||
| 76 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); | ||
| 77 | op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); | ||
| 78 | |||
| 79 | switch (opcode->get().GetId()) { | ||
| 80 | case OpCode::Id::HSET2_R: | ||
| 81 | op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); | ||
| 82 | [[fallthrough]]; | ||
| 83 | case OpCode::Id::HSET2_C: | ||
| 84 | op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); | ||
| 85 | break; | ||
| 86 | default: | ||
| 87 | break; | ||
| 88 | } | ||
| 43 | 89 | ||
| 44 | const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); | 90 | Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); |
| 91 | |||
| 92 | Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); | ||
| 45 | 93 | ||
| 46 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | 94 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); |
| 47 | 95 | ||
| 48 | // HSET2 operates on each half float in the pack. | 96 | // HSET2 operates on each half float in the pack. |
| 49 | std::array<Node, 2> values; | 97 | std::array<Node, 2> values; |
| 50 | for (u32 i = 0; i < 2; ++i) { | 98 | for (u32 i = 0; i < 2; ++i) { |
| 51 | const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; | 99 | const u32 raw_value = bf ? 0x3c00 : 0xffff; |
| 52 | const Node true_value = Immediate(raw_value << (i * 16)); | 100 | Node true_value = Immediate(raw_value << (i * 16)); |
| 53 | const Node false_value = Immediate(0); | 101 | Node false_value = Immediate(0); |
| 54 | |||
| 55 | const Node comparison = | ||
| 56 | Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 57 | const Node predicate = Operation(combiner, comparison, second_pred); | ||
| 58 | 102 | ||
| 103 | Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 104 | Node predicate = Operation(combiner, comparison, second_pred); | ||
| 59 | values[i] = | 105 | values[i] = |
| 60 | Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); | 106 | Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); |
| 61 | } | 107 | } |
| 62 | 108 | ||
| 63 | const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); | 109 | Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); |
| 64 | SetRegister(bb, instr.gpr0, value); | 110 | SetRegister(bb, instr.gpr0, move(value)); |
| 65 | 111 | ||
| 66 | return pc; | 112 | return pc; |
| 67 | } | 113 | } |