summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp22
-rw-r--r--src/video_core/shader/decode/hfma2.cpp9
2 files changed, 16 insertions, 15 deletions
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index 9467f9417..2098c1170 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -9,6 +9,7 @@
9 9
10namespace VideoCommon::Shader { 10namespace VideoCommon::Shader {
11 11
12using Tegra::Shader::HalfType;
12using Tegra::Shader::Instruction; 13using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 14using Tegra::Shader::OpCode;
14 15
@@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
22 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); 23 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
23 } 24 }
24 } 25 }
25 UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
26 26
27 const bool negate_a = 27 const bool negate_a =
28 opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; 28 opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
@@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
32 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); 32 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
33 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); 33 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
34 34
35 Node op_b = [&]() { 35 auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
36 switch (opcode->get().GetId()) { 36 switch (opcode->get().GetId()) {
37 case OpCode::Id::HADD2_C: 37 case OpCode::Id::HADD2_C:
38 case OpCode::Id::HMUL2_C: 38 case OpCode::Id::HMUL2_C:
39 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); 39 return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
40 case OpCode::Id::HADD2_R: 40 case OpCode::Id::HADD2_R:
41 case OpCode::Id::HMUL2_R: 41 case OpCode::Id::HMUL2_R:
42 return GetRegister(instr.gpr20); 42 return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
43 default: 43 default:
44 UNREACHABLE(); 44 UNREACHABLE();
45 return Immediate(0); 45 return {HalfType::F32, Immediate(0)};
46 } 46 }
47 }(); 47 }();
48 op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); 48 op_b = UnpackHalfFloat(op_b, type_b);
49 op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); 49 // redeclaration to avoid a bug in clang with reusing local bindings in lambdas
50 Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
50 51
51 Node value = [&]() { 52 Node value = [&]() {
52 switch (opcode->get().GetId()) { 53 switch (opcode->get().GetId()) {
53 case OpCode::Id::HADD2_C: 54 case OpCode::Id::HADD2_C:
54 case OpCode::Id::HADD2_R: 55 case OpCode::Id::HADD2_R:
55 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); 56 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
56 case OpCode::Id::HMUL2_C: 57 case OpCode::Id::HMUL2_C:
57 case OpCode::Id::HMUL2_R: 58 case OpCode::Id::HMUL2_R:
58 return Operation(OperationCode::HMul, PRECISE, op_a, op_b); 59 return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
59 default: 60 default:
60 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); 61 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
61 return Immediate(0); 62 return Immediate(0);
62 } 63 }
63 }(); 64 }();
65 value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
64 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); 66 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
65 67
66 SetRegister(bb, instr.gpr0, value); 68 SetRegister(bb, instr.gpr0, value);
@@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
68 return pc; 70 return pc;
69} 71}
70 72
71} // namespace VideoCommon::Shader \ No newline at end of file 73} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 5c1becce5..a425f9eb7 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
34 case OpCode::Id::HFMA2_CR: 34 case OpCode::Id::HFMA2_CR:
35 neg_b = instr.hfma2.negate_b; 35 neg_b = instr.hfma2.negate_b;
36 neg_c = instr.hfma2.negate_c; 36 neg_c = instr.hfma2.negate_c;
37 return {instr.hfma2.saturate, instr.hfma2.type_b, 37 return {instr.hfma2.saturate, HalfType::F32,
38 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), 38 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
39 instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; 39 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
40 case OpCode::Id::HFMA2_RC: 40 case OpCode::Id::HFMA2_RC:
41 neg_b = instr.hfma2.negate_b; 41 neg_b = instr.hfma2.negate_b;
42 neg_c = instr.hfma2.negate_c; 42 neg_c = instr.hfma2.negate_c;
43 return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), 43 return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
44 instr.hfma2.type_b, 44 HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
45 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
46 case OpCode::Id::HFMA2_RR: 45 case OpCode::Id::HFMA2_RR:
47 neg_b = instr.hfma2.rr.negate_b; 46 neg_b = instr.hfma2.rr.negate_b;
48 neg_c = instr.hfma2.rr.negate_c; 47 neg_c = instr.hfma2.rr.negate_c;
@@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
56 return {false, identity, Immediate(0), identity, Immediate(0)}; 55 return {false, identity, Immediate(0), identity, Immediate(0)};
57 } 56 }
58 }(); 57 }();
59 UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
60 58
61 const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); 59 const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
62 op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); 60 op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
63 op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); 61 op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
64 62
65 Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); 63 Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
64 value = GetSaturatedHalfFloat(value, saturate);
66 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); 65 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
67 66
68 SetRegister(bb, instr.gpr0, value); 67 SetRegister(bb, instr.gpr0, value);
@@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
70 return pc; 69 return pc;
71} 70}
72 71
73} // namespace VideoCommon::Shader \ No newline at end of file 72} // namespace VideoCommon::Shader