diff options
| author | 2018-12-23 02:26:35 -0300 | |
|---|---|---|
| committer | 2019-01-15 17:54:52 -0300 | |
| commit | dd91650aaf217196a2b1ced17df24bd74349843d (patch) | |
| tree | 537f6098b72cd7fa7ccccc0c44f294faf95e96e9 | |
| parent | glsl_decompiler: Remove HNegate inlining (diff) | |
| download | yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.tar.gz yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.tar.xz yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.zip | |
shader_decode: Implement HFMA2
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 1 | ||||
| -rw-r--r-- | src/video_core/shader/decode/hfma2.cpp | 54 | ||||
| -rw-r--r-- | src/video_core/shader/glsl_decompiler.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 1 |
4 files changed, 60 insertions, 5 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c4987b682..9cb23f375 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -648,6 +648,7 @@ union Instruction { | |||
| 648 | BitField<37, 2, HalfPrecision> precision; | 648 | BitField<37, 2, HalfPrecision> precision; |
| 649 | BitField<32, 1, u64> saturate; | 649 | BitField<32, 1, u64> saturate; |
| 650 | 650 | ||
| 651 | BitField<31, 1, u64> negate_b; | ||
| 651 | BitField<30, 1, u64> negate_c; | 652 | BitField<30, 1, u64> negate_c; |
| 652 | BitField<35, 2, HalfType> type_c; | 653 | BitField<35, 2, HalfType> type_c; |
| 653 | } rr; | 654 | } rr; |
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 5ce08481e..bf7491804 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <tuple> | ||
| 6 | |||
| 5 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 6 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 7 | #include "video_core/engines/shader_bytecode.h" | 9 | #include "video_core/engines/shader_bytecode.h" |
| @@ -9,6 +11,8 @@ | |||
| 9 | 11 | ||
| 10 | namespace VideoCommon::Shader { | 12 | namespace VideoCommon::Shader { |
| 11 | 13 | ||
| 14 | using Tegra::Shader::HalfPrecision; | ||
| 15 | using Tegra::Shader::HalfType; | ||
| 12 | using Tegra::Shader::Instruction; | 16 | using Tegra::Shader::Instruction; |
| 13 | using Tegra::Shader::OpCode; | 17 | using Tegra::Shader::OpCode; |
| 14 | 18 | ||
| @@ -16,7 +20,55 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) { | |||
| 16 | const Instruction instr = {program_code[pc]}; | 20 | const Instruction instr = {program_code[pc]}; |
| 17 | const auto opcode = OpCode::Decode(instr); | 21 | const auto opcode = OpCode::Decode(instr); |
| 18 | 22 | ||
| 19 | UNIMPLEMENTED(); | 23 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { |
| 24 | UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); | ||
| 25 | } else { | ||
| 26 | UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); | ||
| 27 | } | ||
| 28 | |||
| 29 | constexpr auto identity = HalfType::H0_H1; | ||
| 30 | |||
| 31 | const HalfType type_a = instr.hfma2.type_a; | ||
| 32 | const Node op_a = GetRegister(instr.gpr8); | ||
| 33 | |||
| 34 | bool neg_b{}, neg_c{}; | ||
| 35 | auto [saturate, type_b, op_b, type_c, | ||
| 36 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { | ||
| 37 | switch (opcode->get().GetId()) { | ||
| 38 | case OpCode::Id::HFMA2_CR: | ||
| 39 | neg_b = instr.hfma2.negate_b; | ||
| 40 | neg_c = instr.hfma2.negate_c; | ||
| 41 | return {instr.hfma2.saturate, instr.hfma2.type_b, | ||
| 42 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39, | ||
| 43 | GetRegister(instr.gpr39)}; | ||
| 44 | case OpCode::Id::HFMA2_RC: | ||
| 45 | neg_b = instr.hfma2.negate_b; | ||
| 46 | neg_c = instr.hfma2.negate_c; | ||
| 47 | return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), | ||
| 48 | instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; | ||
| 49 | case OpCode::Id::HFMA2_RR: | ||
| 50 | neg_b = instr.hfma2.rr.negate_b; | ||
| 51 | neg_c = instr.hfma2.rr.negate_c; | ||
| 52 | return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), | ||
| 53 | instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; | ||
| 54 | case OpCode::Id::HFMA2_IMM_R: | ||
| 55 | neg_c = instr.hfma2.negate_c; | ||
| 56 | return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), | ||
| 57 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 58 | default: | ||
| 59 | return {false, identity, Immediate(0), identity, Immediate(0)}; | ||
| 60 | } | ||
| 61 | }(); | ||
| 62 | UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); | ||
| 63 | |||
| 64 | op_b = GetOperandAbsNegHalf(op_b, false, neg_b); | ||
| 65 | op_c = GetOperandAbsNegHalf(op_c, false, neg_c); | ||
| 66 | |||
| 67 | MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; | ||
| 68 | Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); | ||
| 69 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | ||
| 70 | |||
| 71 | SetRegister(bb, instr.gpr0, value); | ||
| 20 | 72 | ||
| 21 | return pc; | 73 | return pc; |
| 22 | } | 74 | } |
diff --git a/src/video_core/shader/glsl_decompiler.cpp b/src/video_core/shader/glsl_decompiler.cpp index abc9a556d..c364a43ce 100644 --- a/src/video_core/shader/glsl_decompiler.cpp +++ b/src/video_core/shader/glsl_decompiler.cpp | |||
| @@ -762,9 +762,9 @@ private: | |||
| 762 | return GenerateBinaryInfix(operation, "/", type, type, type); | 762 | return GenerateBinaryInfix(operation, "/", type, type, type); |
| 763 | } | 763 | } |
| 764 | 764 | ||
| 765 | std::string FFma(Operation operation) { | 765 | template <Type type> |
| 766 | return GenerateTernary(operation, "fma", Type::Float, Type::Float, Type::Float, | 766 | std::string Fma(Operation operation) { |
| 767 | Type::Float); | 767 | return GenerateTernary(operation, "fma", type, type, type, type); |
| 768 | } | 768 | } |
| 769 | 769 | ||
| 770 | template <Type type> | 770 | template <Type type> |
| @@ -1231,7 +1231,7 @@ private: | |||
| 1231 | &Add<Type::Float>, | 1231 | &Add<Type::Float>, |
| 1232 | &Mul<Type::Float>, | 1232 | &Mul<Type::Float>, |
| 1233 | &Div<Type::Float>, | 1233 | &Div<Type::Float>, |
| 1234 | &FFma, | 1234 | &Fma<Type::Float>, |
| 1235 | &Negate<Type::Float>, | 1235 | &Negate<Type::Float>, |
| 1236 | &Absolute<Type::Float>, | 1236 | &Absolute<Type::Float>, |
| 1237 | &FClamp, | 1237 | &FClamp, |
| @@ -1289,6 +1289,7 @@ private: | |||
| 1289 | 1289 | ||
| 1290 | &Add<Type::HalfFloat>, | 1290 | &Add<Type::HalfFloat>, |
| 1291 | &Mul<Type::HalfFloat>, | 1291 | &Mul<Type::HalfFloat>, |
| 1292 | &Fma<Type::HalfFloat>, | ||
| 1292 | &Absolute<Type::HalfFloat>, | 1293 | &Absolute<Type::HalfFloat>, |
| 1293 | &HNegate, | 1294 | &HNegate, |
| 1294 | &HMergeF32, | 1295 | &HMergeF32, |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ccdf316ac..928e3e7d5 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -110,6 +110,7 @@ enum class OperationCode { | |||
| 110 | 110 | ||
| 111 | HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 111 | HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 112 | HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 112 | HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 113 | HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | ||
| 113 | HAbsolute, /// (f16vec2 a) -> f16vec2 | 114 | HAbsolute, /// (f16vec2 a) -> f16vec2 |
| 114 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | 115 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 |
| 115 | HMergeF32, /// (f16vec2 src) -> float | 116 | HMergeF32, /// (f16vec2 src) -> float |