summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-04-20 03:24:00 -0300
committerGravatar ReinUsesLisp2020-04-23 18:29:38 -0300
commitdbaebd8582c33807ca25acbf36cbd90587c9cfa9 (patch)
treee64142da235b23d85320ad53741cfb2b0cd51f5f /src
parentMerge pull request #3719 from jbeich/clang (diff)
downloadyuzu-dbaebd8582c33807ca25acbf36cbd90587c9cfa9.tar.gz
yuzu-dbaebd8582c33807ca25acbf36cbd90587c9cfa9.tar.xz
yuzu-dbaebd8582c33807ca25acbf36cbd90587c9cfa9.zip
decode/arithmetic_half: Fix HADD2 and HMUL2 absolute and negation bits
The encoding for negation and absolute value was wrong. Extracting is now done manually. Similar instructions having different encodings is the rule, not the exception. To keep sanity and readability I preferred to extract the desired bit manually. This is implemented against nxas: https://github.com/ReinUsesLisp/nxas/blob/8dbc38995711cc12206aa370145a3a02665fd989/table.h#L68 That is itself tested against nvdisasm (Nvidia's official disassembler).
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/shader_bytecode.h2
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp51
2 files changed, 37 insertions, 16 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7231597d4..44dbb8088 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -817,11 +817,9 @@ union Instruction {
817 BitField<32, 1, u64> saturate; 817 BitField<32, 1, u64> saturate;
818 BitField<49, 2, HalfMerge> merge; 818 BitField<49, 2, HalfMerge> merge;
819 819
820 BitField<43, 1, u64> negate_a;
821 BitField<44, 1, u64> abs_a; 820 BitField<44, 1, u64> abs_a;
822 BitField<47, 2, HalfType> type_a; 821 BitField<47, 2, HalfType> type_a;
823 822
824 BitField<31, 1, u64> negate_b;
825 BitField<30, 1, u64> abs_b; 823 BitField<30, 1, u64> abs_b;
826 BitField<28, 2, HalfType> type_b; 824 BitField<28, 2, HalfType> type_b;
827 825
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index ee7d9a29d..a276aee44 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -19,22 +19,46 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]}; 19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr); 20 const auto opcode = OpCode::Decode(instr);
21 21
22 if (opcode->get().GetId() == OpCode::Id::HADD2_C || 22 bool negate_a = false;
23 opcode->get().GetId() == OpCode::Id::HADD2_R) { 23 bool negate_b = false;
24 bool absolute_a = false;
25 bool absolute_b = false;
26
27 switch (opcode->get().GetId()) {
28 case OpCode::Id::HADD2_R:
24 if (instr.alu_half.ftz == 0) { 29 if (instr.alu_half.ftz == 0) {
25 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); 30 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
26 } 31 }
32 negate_a = ((instr.value >> 43) & 1) != 0;
33 negate_b = ((instr.value >> 31) & 1) != 0;
34 absolute_a = ((instr.value >> 44) & 1) != 0;
35 absolute_b = ((instr.value >> 30) & 1) != 0;
36 break;
37 case OpCode::Id::HADD2_C:
38 if (instr.alu_half.ftz == 0) {
39 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
40 }
41 negate_a = ((instr.value >> 43) & 1) != 0;
42 negate_b = ((instr.value >> 56) & 1) != 0;
43 absolute_a = ((instr.value >> 44) & 1) != 0;
44 absolute_b = ((instr.value >> 54) & 1) != 0;
45 break;
46 case OpCode::Id::HMUL2_R:
47 negate_a = ((instr.value >> 43) & 1) != 0;
48 absolute_a = ((instr.value >> 44) & 1) != 0;
49 absolute_b = ((instr.value >> 30) & 1) != 0;
50 break;
51 case OpCode::Id::HMUL2_C:
52 negate_b = ((instr.value >> 31) & 1) != 0;
53 absolute_a = ((instr.value >> 44) & 1) != 0;
54 absolute_b = ((instr.value >> 54) & 1) != 0;
55 break;
27 } 56 }
28 57
29 const bool negate_a =
30 opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
31 const bool negate_b =
32 opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
33
34 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); 58 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
35 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); 59 op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
36 60
37 auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> { 61 auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
38 switch (opcode->get().GetId()) { 62 switch (opcode->get().GetId()) {
39 case OpCode::Id::HADD2_C: 63 case OpCode::Id::HADD2_C:
40 case OpCode::Id::HMUL2_C: 64 case OpCode::Id::HMUL2_C:
@@ -48,17 +72,16 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
48 } 72 }
49 }(); 73 }();
50 op_b = UnpackHalfFloat(op_b, type_b); 74 op_b = UnpackHalfFloat(op_b, type_b);
51 // redeclaration to avoid a bug in clang with reusing local bindings in lambdas 75 op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
52 Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
53 76
54 Node value = [&]() { 77 Node value = [this, opcode, op_a, op_b = op_b] {
55 switch (opcode->get().GetId()) { 78 switch (opcode->get().GetId()) {
56 case OpCode::Id::HADD2_C: 79 case OpCode::Id::HADD2_C:
57 case OpCode::Id::HADD2_R: 80 case OpCode::Id::HADD2_R:
58 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt); 81 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
59 case OpCode::Id::HMUL2_C: 82 case OpCode::Id::HMUL2_C:
60 case OpCode::Id::HMUL2_R: 83 case OpCode::Id::HMUL2_R:
61 return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt); 84 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
62 default: 85 default:
63 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); 86 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
64 return Immediate(0); 87 return Immediate(0);