summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2018-12-23 02:26:35 -0300
committerGravatar ReinUsesLisp2019-01-15 17:54:52 -0300
commitdd91650aaf217196a2b1ced17df24bd74349843d (patch)
tree537f6098b72cd7fa7ccccc0c44f294faf95e96e9
parentglsl_decompiler: Remove HNegate inlining (diff)
downloadyuzu-dd91650aaf217196a2b1ced17df24bd74349843d.tar.gz
yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.tar.xz
yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.zip
shader_decode: Implement HFMA2
-rw-r--r--src/video_core/engines/shader_bytecode.h1
-rw-r--r--src/video_core/shader/decode/hfma2.cpp54
-rw-r--r--src/video_core/shader/glsl_decompiler.cpp9
-rw-r--r--src/video_core/shader/shader_ir.h1
4 files changed, 60 insertions, 5 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c4987b682..9cb23f375 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -648,6 +648,7 @@ union Instruction {
648 BitField<37, 2, HalfPrecision> precision; 648 BitField<37, 2, HalfPrecision> precision;
649 BitField<32, 1, u64> saturate; 649 BitField<32, 1, u64> saturate;
650 650
651 BitField<31, 1, u64> negate_b;
651 BitField<30, 1, u64> negate_c; 652 BitField<30, 1, u64> negate_c;
652 BitField<35, 2, HalfType> type_c; 653 BitField<35, 2, HalfType> type_c;
653 } rr; 654 } rr;
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 5ce08481e..bf7491804 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <tuple>
6
5#include "common/assert.h" 7#include "common/assert.h"
6#include "common/common_types.h" 8#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h" 9#include "video_core/engines/shader_bytecode.h"
@@ -9,6 +11,8 @@
9 11
10namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
11 13
14using Tegra::Shader::HalfPrecision;
15using Tegra::Shader::HalfType;
12using Tegra::Shader::Instruction; 16using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode; 17using Tegra::Shader::OpCode;
14 18
@@ -16,7 +20,55 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 20 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 21 const auto opcode = OpCode::Decode(instr);
18 22
19 UNIMPLEMENTED(); 23 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
24 UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
25 } else {
26 UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
27 }
28
29 constexpr auto identity = HalfType::H0_H1;
30
31 const HalfType type_a = instr.hfma2.type_a;
32 const Node op_a = GetRegister(instr.gpr8);
33
34 bool neg_b{}, neg_c{};
35 auto [saturate, type_b, op_b, type_c,
36 op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
37 switch (opcode->get().GetId()) {
38 case OpCode::Id::HFMA2_CR:
39 neg_b = instr.hfma2.negate_b;
40 neg_c = instr.hfma2.negate_c;
41 return {instr.hfma2.saturate, instr.hfma2.type_b,
42 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
43 GetRegister(instr.gpr39)};
44 case OpCode::Id::HFMA2_RC:
45 neg_b = instr.hfma2.negate_b;
46 neg_c = instr.hfma2.negate_c;
47 return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
48 instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
49 case OpCode::Id::HFMA2_RR:
50 neg_b = instr.hfma2.rr.negate_b;
51 neg_c = instr.hfma2.rr.negate_c;
52 return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
53 instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
54 case OpCode::Id::HFMA2_IMM_R:
55 neg_c = instr.hfma2.negate_c;
56 return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
57 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
58 default:
59 return {false, identity, Immediate(0), identity, Immediate(0)};
60 }
61 }();
62 UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
63
64 op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
65 op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
66
67 MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
68 Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
69 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
70
71 SetRegister(bb, instr.gpr0, value);
20 72
21 return pc; 73 return pc;
22} 74}
diff --git a/src/video_core/shader/glsl_decompiler.cpp b/src/video_core/shader/glsl_decompiler.cpp
index abc9a556d..c364a43ce 100644
--- a/src/video_core/shader/glsl_decompiler.cpp
+++ b/src/video_core/shader/glsl_decompiler.cpp
@@ -762,9 +762,9 @@ private:
762 return GenerateBinaryInfix(operation, "/", type, type, type); 762 return GenerateBinaryInfix(operation, "/", type, type, type);
763 } 763 }
764 764
765 std::string FFma(Operation operation) { 765 template <Type type>
766 return GenerateTernary(operation, "fma", Type::Float, Type::Float, Type::Float, 766 std::string Fma(Operation operation) {
767 Type::Float); 767 return GenerateTernary(operation, "fma", type, type, type, type);
768 } 768 }
769 769
770 template <Type type> 770 template <Type type>
@@ -1231,7 +1231,7 @@ private:
1231 &Add<Type::Float>, 1231 &Add<Type::Float>,
1232 &Mul<Type::Float>, 1232 &Mul<Type::Float>,
1233 &Div<Type::Float>, 1233 &Div<Type::Float>,
1234 &FFma, 1234 &Fma<Type::Float>,
1235 &Negate<Type::Float>, 1235 &Negate<Type::Float>,
1236 &Absolute<Type::Float>, 1236 &Absolute<Type::Float>,
1237 &FClamp, 1237 &FClamp,
@@ -1289,6 +1289,7 @@ private:
1289 1289
1290 &Add<Type::HalfFloat>, 1290 &Add<Type::HalfFloat>,
1291 &Mul<Type::HalfFloat>, 1291 &Mul<Type::HalfFloat>,
1292 &Fma<Type::HalfFloat>,
1292 &Absolute<Type::HalfFloat>, 1293 &Absolute<Type::HalfFloat>,
1293 &HNegate, 1294 &HNegate,
1294 &HMergeF32, 1295 &HMergeF32,
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ccdf316ac..928e3e7d5 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -110,6 +110,7 @@ enum class OperationCode {
110 110
111 HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 111 HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
112 HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 112 HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
113 HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
113 HAbsolute, /// (f16vec2 a) -> f16vec2 114 HAbsolute, /// (f16vec2 a) -> f16vec2
114 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 115 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
115 HMergeF32, /// (f16vec2 src) -> float 116 HMergeF32, /// (f16vec2 src) -> float