diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/shader/decode/arithmetic_half.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/shader/decode/hfma2.cpp | 9 |
2 files changed, 16 insertions, 15 deletions
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 9467f9417..2098c1170 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | namespace VideoCommon::Shader { | 10 | namespace VideoCommon::Shader { |
| 11 | 11 | ||
| 12 | using Tegra::Shader::HalfType; | ||
| 12 | using Tegra::Shader::Instruction; | 13 | using Tegra::Shader::Instruction; |
| 13 | using Tegra::Shader::OpCode; | 14 | using Tegra::Shader::OpCode; |
| 14 | 15 | ||
| @@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||
| 22 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | 23 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); |
| 23 | } | 24 | } |
| 24 | } | 25 | } |
| 25 | UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); | ||
| 26 | 26 | ||
| 27 | const bool negate_a = | 27 | const bool negate_a = |
| 28 | opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; | 28 | opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; |
| @@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||
| 32 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); | 32 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); |
| 33 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); | 33 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); |
| 34 | 34 | ||
| 35 | Node op_b = [&]() { | 35 | auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> { |
| 36 | switch (opcode->get().GetId()) { | 36 | switch (opcode->get().GetId()) { |
| 37 | case OpCode::Id::HADD2_C: | 37 | case OpCode::Id::HADD2_C: |
| 38 | case OpCode::Id::HMUL2_C: | 38 | case OpCode::Id::HMUL2_C: |
| 39 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | 39 | return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; |
| 40 | case OpCode::Id::HADD2_R: | 40 | case OpCode::Id::HADD2_R: |
| 41 | case OpCode::Id::HMUL2_R: | 41 | case OpCode::Id::HMUL2_R: |
| 42 | return GetRegister(instr.gpr20); | 42 | return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; |
| 43 | default: | 43 | default: |
| 44 | UNREACHABLE(); | 44 | UNREACHABLE(); |
| 45 | return Immediate(0); | 45 | return {HalfType::F32, Immediate(0)}; |
| 46 | } | 46 | } |
| 47 | }(); | 47 | }(); |
| 48 | op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); | 48 | op_b = UnpackHalfFloat(op_b, type_b); |
| 49 | op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); | 49 | // redeclaration to avoid a bug in clang with reusing local bindings in lambdas |
| 50 | Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); | ||
| 50 | 51 | ||
| 51 | Node value = [&]() { | 52 | Node value = [&]() { |
| 52 | switch (opcode->get().GetId()) { | 53 | switch (opcode->get().GetId()) { |
| 53 | case OpCode::Id::HADD2_C: | 54 | case OpCode::Id::HADD2_C: |
| 54 | case OpCode::Id::HADD2_R: | 55 | case OpCode::Id::HADD2_R: |
| 55 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); | 56 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt); |
| 56 | case OpCode::Id::HMUL2_C: | 57 | case OpCode::Id::HMUL2_C: |
| 57 | case OpCode::Id::HMUL2_R: | 58 | case OpCode::Id::HMUL2_R: |
| 58 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); | 59 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt); |
| 59 | default: | 60 | default: |
| 60 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | 61 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); |
| 61 | return Immediate(0); | 62 | return Immediate(0); |
| 62 | } | 63 | } |
| 63 | }(); | 64 | }(); |
| 65 | value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); | ||
| 64 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); | 66 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); |
| 65 | 67 | ||
| 66 | SetRegister(bb, instr.gpr0, value); | 68 | SetRegister(bb, instr.gpr0, value); |
| @@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||
| 68 | return pc; | 70 | return pc; |
| 69 | } | 71 | } |
| 70 | 72 | ||
| 71 | } // namespace VideoCommon::Shader \ No newline at end of file | 73 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 5c1becce5..a425f9eb7 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp | |||
| @@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||
| 34 | case OpCode::Id::HFMA2_CR: | 34 | case OpCode::Id::HFMA2_CR: |
| 35 | neg_b = instr.hfma2.negate_b; | 35 | neg_b = instr.hfma2.negate_b; |
| 36 | neg_c = instr.hfma2.negate_c; | 36 | neg_c = instr.hfma2.negate_c; |
| 37 | return {instr.hfma2.saturate, instr.hfma2.type_b, | 37 | return {instr.hfma2.saturate, HalfType::F32, |
| 38 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | 38 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), |
| 39 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | 39 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; |
| 40 | case OpCode::Id::HFMA2_RC: | 40 | case OpCode::Id::HFMA2_RC: |
| 41 | neg_b = instr.hfma2.negate_b; | 41 | neg_b = instr.hfma2.negate_b; |
| 42 | neg_c = instr.hfma2.negate_c; | 42 | neg_c = instr.hfma2.negate_c; |
| 43 | return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), | 43 | return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), |
| 44 | instr.hfma2.type_b, | 44 | HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; |
| 45 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 46 | case OpCode::Id::HFMA2_RR: | 45 | case OpCode::Id::HFMA2_RR: |
| 47 | neg_b = instr.hfma2.rr.negate_b; | 46 | neg_b = instr.hfma2.rr.negate_b; |
| 48 | neg_c = instr.hfma2.rr.negate_c; | 47 | neg_c = instr.hfma2.rr.negate_c; |
| @@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||
| 56 | return {false, identity, Immediate(0), identity, Immediate(0)}; | 55 | return {false, identity, Immediate(0), identity, Immediate(0)}; |
| 57 | } | 56 | } |
| 58 | }(); | 57 | }(); |
| 59 | UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); | ||
| 60 | 58 | ||
| 61 | const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); | 59 | const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); |
| 62 | op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); | 60 | op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); |
| 63 | op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); | 61 | op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); |
| 64 | 62 | ||
| 65 | Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); | 63 | Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); |
| 64 | value = GetSaturatedHalfFloat(value, saturate); | ||
| 66 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | 65 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); |
| 67 | 66 | ||
| 68 | SetRegister(bb, instr.gpr0, value); | 67 | SetRegister(bb, instr.gpr0, value); |
| @@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||
| 70 | return pc; | 69 | return pc; |
| 71 | } | 70 | } |
| 72 | 71 | ||
| 73 | } // namespace VideoCommon::Shader \ No newline at end of file | 72 | } // namespace VideoCommon::Shader |