Merge pull request #3734 from ReinUsesLisp/half-float-mods

decode/arithmetic_half: Fix HADD2 and HMUL2 absolute and negation bits
author: bunnei 2020-04-25 00:41:43 -0400
committer: GitHub 2020-04-25 00:41:43 -0400
commit: 4e37825dab85f9f3895f0f545a60becc049064e5 (patch)
tree: fcf63a6f6abab52711d312f286f9de5e410e45e9
parent: Merge pull request #3780 from lioncash/process (diff)
parent: decode/arithmetic_half: Fix HADD2 and HMUL2 absolute and negation bits (diff)
download: yuzu-4e37825dab85f9f3895f0f545a60becc049064e5.tar.gz
yuzu-4e37825dab85f9f3895f0f545a60becc049064e5.tar.xz
yuzu-4e37825dab85f9f3895f0f545a60becc049064e5.zip
2 files changed, 37 insertions, 16 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 73d98fe8c..cde3a26b9 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -818,11 +818,9 @@ union Instruction {
        BitField<32, 1, u64> saturate;
        BitField<49, 2, HalfMerge> merge;
-        BitField<43, 1, u64> negate_a;
        BitField<44, 1, u64> abs_a;
        BitField<47, 2, HalfType> type_a;
-        BitField<31, 1, u64> negate_b;
        BitField<30, 1, u64> abs_b;
        BitField<28, 2, HalfType> type_b;
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index ee7d9a29d..a276aee44 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -19,22 +19,46 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
-    if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
+    bool negate_a = false;
-        opcode->get().GetId() == OpCode::Id::HADD2_R) {
+    bool negate_b = false;
+    bool absolute_a = false;
+    bool absolute_b = false;
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::HADD2_R:
        if (instr.alu_half.ftz == 0) {
            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
        }
+        negate_a = ((instr.value >> 43) & 1) != 0;
+        negate_b = ((instr.value >> 31) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 30) & 1) != 0;
+        break;
+    case OpCode::Id::HADD2_C:
+        if (instr.alu_half.ftz == 0) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+        }
+        negate_a = ((instr.value >> 43) & 1) != 0;
+        negate_b = ((instr.value >> 56) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 54) & 1) != 0;
+        break;
+    case OpCode::Id::HMUL2_R:
+        negate_a = ((instr.value >> 43) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 30) & 1) != 0;
+        break;
+    case OpCode::Id::HMUL2_C:
+        negate_b = ((instr.value >> 31) & 1) != 0;
+        absolute_a = ((instr.value >> 44) & 1) != 0;
+        absolute_b = ((instr.value >> 54) & 1) != 0;
+        break;
    }
-    const bool negate_a =
-        opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
-    const bool negate_b =
-        opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
-    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
+    op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
-    auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
+    auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HADD2_C:
        case OpCode::Id::HMUL2_C:
@@ -48,17 +72,16 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
        }
    }();
    op_b = UnpackHalfFloat(op_b, type_b);
-    // redeclaration to avoid a bug in clang with reusing local bindings in lambdas
+    op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
-    Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
-    Node value = [&]() {
+    Node value = [this, opcode, op_a, op_b = op_b] {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HADD2_C:
        case OpCode::Id::HADD2_R:
-            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
+            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
        case OpCode::Id::HMUL2_C:
        case OpCode::Id::HMUL2_R:
-            return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
+            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
        default:
            UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
            return Immediate(0);
author	bunnei	2020-04-25 00:41:43 -0400
committer	GitHub	2020-04-25 00:41:43 -0400
commit	4e37825dab85f9f3895f0f545a60becc049064e5 (patch)
tree	fcf63a6f6abab52711d312f286f9de5e410e45e9
parent	Merge pull request #3780 from lioncash/process (diff)
parent	decode/arithmetic_half: Fix HADD2 and HMUL2 absolute and negation bits (diff)
download	yuzu-4e37825dab85f9f3895f0f545a60becc049064e5.tar.gz yuzu-4e37825dab85f9f3895f0f545a60becc049064e5.tar.xz yuzu-4e37825dab85f9f3895f0f545a60becc049064e5.zip