shader_decode: Implement HFMA2

author: ReinUsesLisp 2018-12-23 02:26:35 -0300
committer: ReinUsesLisp 2019-01-15 17:54:52 -0300
commit: dd91650aaf217196a2b1ced17df24bd74349843d (patch)
tree: 537f6098b72cd7fa7ccccc0c44f294faf95e96e9
parent: glsl_decompiler: Remove HNegate inlining (diff)
download: yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.tar.gz
yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.tar.xz
yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.zip
4 files changed, 60 insertions, 5 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c4987b682..9cb23f375 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -648,6 +648,7 @@ union Instruction {
            BitField<37, 2, HalfPrecision> precision;
            BitField<32, 1, u64> saturate;
+            BitField<31, 1, u64> negate_b;
            BitField<30, 1, u64> negate_c;
            BitField<35, 2, HalfType> type_c;
        } rr;
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 5ce08481e..bf7491804 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
+#include <tuple>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
@@ -9,6 +11,8 @@
 namespace VideoCommon::Shader {
+using Tegra::Shader::HalfPrecision;
+using Tegra::Shader::HalfType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
@@ -16,7 +20,55 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
-    UNIMPLEMENTED();
+    if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
+        UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
+    } else {
+        UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
+    }
+    constexpr auto identity = HalfType::H0_H1;
+    const HalfType type_a = instr.hfma2.type_a;
+    const Node op_a = GetRegister(instr.gpr8);
+    bool neg_b{}, neg_c{};
+    auto [saturate, type_b, op_b, type_c,
+          op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HFMA2_CR:
+            neg_b = instr.hfma2.negate_b;
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, instr.hfma2.type_b,
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
+                    GetRegister(instr.gpr39)};
+        case OpCode::Id::HFMA2_RC:
+            neg_b = instr.hfma2.negate_b;
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
+                    instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+        case OpCode::Id::HFMA2_RR:
+            neg_b = instr.hfma2.rr.negate_b;
+            neg_c = instr.hfma2.rr.negate_c;
+            return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
+                    instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
+        case OpCode::Id::HFMA2_IMM_R:
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
+                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
+        default:
+            return {false, identity, Immediate(0), identity, Immediate(0)};
+        }
+    }();
+    UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
+    op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
+    op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
+    MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
+    Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
+    SetRegister(bb, instr.gpr0, value);
    return pc;
 }
diff --git a/src/video_core/shader/glsl_decompiler.cpp b/src/video_core/shader/glsl_decompiler.cpp
index abc9a556d..c364a43ce 100644
--- a/src/video_core/shader/glsl_decompiler.cpp
+++ b/src/video_core/shader/glsl_decompiler.cpp
@@ -762,9 +762,9 @@ private:
        return GenerateBinaryInfix(operation, "/", type, type, type);
    }
-    std::string FFma(Operation operation) {
+    template <Type type>
-        return GenerateTernary(operation, "fma", Type::Float, Type::Float, Type::Float,
+    std::string Fma(Operation operation) {
-                               Type::Float);
+        return GenerateTernary(operation, "fma", type, type, type, type);
    }
    template <Type type>
@@ -1231,7 +1231,7 @@ private:
        &Add<Type::Float>,
        &Mul<Type::Float>,
        &Div<Type::Float>,
-        &FFma,
+        &Fma<Type::Float>,
        &Negate<Type::Float>,
        &Absolute<Type::Float>,
        &FClamp,
@@ -1289,6 +1289,7 @@ private:
        &Add<Type::HalfFloat>,
        &Mul<Type::HalfFloat>,
+        &Fma<Type::HalfFloat>,
        &Absolute<Type::HalfFloat>,
        &HNegate,
        &HMergeF32,
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ccdf316ac..928e3e7d5 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -110,6 +110,7 @@ enum class OperationCode {
    HAdd,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
    HMul,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
+    HFma,      /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
    HAbsolute, /// (f16vec2 a) -> f16vec2
    HNegate,   /// (f16vec2 a, bool first, bool second) -> f16vec2
    HMergeF32, /// (f16vec2 src) -> float
author	ReinUsesLisp	2018-12-23 02:26:35 -0300
committer	ReinUsesLisp	2019-01-15 17:54:52 -0300
commit	dd91650aaf217196a2b1ced17df24bd74349843d (patch)
tree	537f6098b72cd7fa7ccccc0c44f294faf95e96e9
parent	glsl_decompiler: Remove HNegate inlining (diff)
download	yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.tar.gz yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.tar.xz yuzu-dd91650aaf217196a2b1ced17df24bd74349843d.zip

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c4987b682..9cb23f375 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h
@@ -648,6 +648,7 @@ union Instruction {
648	BitField<37, 2, HalfPrecision> precision;	648	BitField<37, 2, HalfPrecision> precision;
649	BitField<32, 1, u64> saturate;	649	BitField<32, 1, u64> saturate;
650		650
		651	BitField<31, 1, u64> negate_b;
651	BitField<30, 1, u64> negate_c;	652	BitField<30, 1, u64> negate_c;
652	BitField<35, 2, HalfType> type_c;	653	BitField<35, 2, HalfType> type_c;
653	} rr;	654	} rr;


diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 5ce08481e..bf7491804 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp
@@ -2,6 +2,8 @@
2	// Licensed under GPLv2 or any later version	2	// Licensed under GPLv2 or any later version
3	// Refer to the license.txt file included.	3	// Refer to the license.txt file included.
4		4
		5	#include <tuple>
		6
5	#include "common/assert.h"	7	#include "common/assert.h"
6	#include "common/common_types.h"	8	#include "common/common_types.h"
7	#include "video_core/engines/shader_bytecode.h"	9	#include "video_core/engines/shader_bytecode.h"
@@ -9,6 +11,8 @@
9		11
10	namespace VideoCommon::Shader {	12	namespace VideoCommon::Shader {
11		13
		14	using Tegra::Shader::HalfPrecision;
		15	using Tegra::Shader::HalfType;
12	using Tegra::Shader::Instruction;	16	using Tegra::Shader::Instruction;
13	using Tegra::Shader::OpCode;	17	using Tegra::Shader::OpCode;
14		18
@@ -16,7 +20,55 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) {
16	const Instruction instr = {program_code[pc]};	20	const Instruction instr = {program_code[pc]};
17	const auto opcode = OpCode::Decode(instr);	21	const auto opcode = OpCode::Decode(instr);
18		22
19	UNIMPLEMENTED();	23	if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
		24	UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
		25	} else {
		26	UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
		27	}
		28
		29	constexpr auto identity = HalfType::H0_H1;
		30
		31	const HalfType type_a = instr.hfma2.type_a;
		32	const Node op_a = GetRegister(instr.gpr8);
		33
		34	bool neg_b{}, neg_c{};
		35	auto [saturate, type_b, op_b, type_c,
		36	op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
		37	switch (opcode->get().GetId()) {
		38	case OpCode::Id::HFMA2_CR:
		39	neg_b = instr.hfma2.negate_b;
		40	neg_c = instr.hfma2.negate_c;
		41	return {instr.hfma2.saturate, instr.hfma2.type_b,
		42	GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
		43	GetRegister(instr.gpr39)};
		44	case OpCode::Id::HFMA2_RC:
		45	neg_b = instr.hfma2.negate_b;
		46	neg_c = instr.hfma2.negate_c;
		47	return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
		48	instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
		49	case OpCode::Id::HFMA2_RR:
		50	neg_b = instr.hfma2.rr.negate_b;
		51	neg_c = instr.hfma2.rr.negate_c;
		52	return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
		53	instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
		54	case OpCode::Id::HFMA2_IMM_R:
		55	neg_c = instr.hfma2.negate_c;
		56	return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
		57	instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
		58	default:
		59	return {false, identity, Immediate(0), identity, Immediate(0)};
		60	}
		61	}();
		62	UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
		63
		64	op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
		65	op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
		66
		67	MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
		68	Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
		69	value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
		70
		71	SetRegister(bb, instr.gpr0, value);
20		72
21	return pc;	73	return pc;
22	}	74	}


diff --git a/src/video_core/shader/glsl_decompiler.cpp b/src/video_core/shader/glsl_decompiler.cpp index abc9a556d..c364a43ce 100644 --- a/src/video_core/shader/glsl_decompiler.cpp +++ b/src/video_core/shader/glsl_decompiler.cpp
@@ -762,9 +762,9 @@ private:
762	return GenerateBinaryInfix(operation, "/", type, type, type);	762	return GenerateBinaryInfix(operation, "/", type, type, type);
763	}	763	}
764		764
765	std::string FFma(Operation operation) {	765	template <Type type>
766	return GenerateTernary(operation, "fma", Type::Float, Type::Float, Type::Float,	766	std::string Fma(Operation operation) {
767	Type::Float);	767	return GenerateTernary(operation, "fma", type, type, type, type);
768	}	768	}
769		769
770	template <Type type>	770	template <Type type>
@@ -1231,7 +1231,7 @@ private:
1231	&Add<Type::Float>,	1231	&Add<Type::Float>,
1232	&Mul<Type::Float>,	1232	&Mul<Type::Float>,
1233	&Div<Type::Float>,	1233	&Div<Type::Float>,
1234	&FFma,	1234	&Fma<Type::Float>,
1235	&Negate<Type::Float>,	1235	&Negate<Type::Float>,
1236	&Absolute<Type::Float>,	1236	&Absolute<Type::Float>,
1237	&FClamp,	1237	&FClamp,
@@ -1289,6 +1289,7 @@ private:
1289		1289
1290	&Add<Type::HalfFloat>,	1290	&Add<Type::HalfFloat>,
1291	&Mul<Type::HalfFloat>,	1291	&Mul<Type::HalfFloat>,
		1292	&Fma<Type::HalfFloat>,
1292	&Absolute<Type::HalfFloat>,	1293	&Absolute<Type::HalfFloat>,
1293	&HNegate,	1294	&HNegate,
1294	&HMergeF32,	1295	&HMergeF32,


diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ccdf316ac..928e3e7d5 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h
@@ -110,6 +110,7 @@ enum class OperationCode {
110		110
111	HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2	111	HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
112	HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2	112	HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
		113	HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
113	HAbsolute, /// (f16vec2 a) -> f16vec2	114	HAbsolute, /// (f16vec2 a) -> f16vec2
114	HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2	115	HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
115	HMergeF32, /// (f16vec2 src) -> float	116	HMergeF32, /// (f16vec2 src) -> float