shader/video: Partially implement VMNMX

Implements the common usages for VMNMX. Inputs with a different size than 32 bits are not supported and sign mismatches aren't supported either. VMNMX works as follows: It grabs Ra and Rb and applies a maximum/minimum on them (this is defined by .MX), having in mind the input sign. This result can then be saturated. After the intermediate result is calculated, it applies another operation on it using Rc. These operations are merges, accumulations or another min/max pass. This instruction allows to implement with a more flexible approach GCN's min3 and max3 instructions (for instance).
author: ReinUsesLisp 2020-03-28 18:49:50 -0300
committer: ReinUsesLisp 2020-04-12 00:34:42 -0300
commit: 76f178ba6e7cc2925ffada341d1e14fb159e93c7 (patch)
tree: fec59062ca81807bd8ac4dc8e0216fe2f5c27384 /src
parent: shader_bytecode: Fix I2I_IMM encoding (diff)
download: yuzu-76f178ba6e7cc2925ffada341d1e14fb159e93c7.tar.gz
yuzu-76f178ba6e7cc2925ffada341d1e14fb159e93c7.tar.xz
yuzu-76f178ba6e7cc2925ffada341d1e14fb159e93c7.zip
3 files changed, 116 insertions, 0 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index fcb2d7935..7400e1aa9 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -302,6 +302,23 @@ enum class VmadShr : u64 {
    Shr15 = 2,
 };
+enum class VmnmxType : u64 {
+    Bits8,
+    Bits16,
+    Bits32,
+};
+enum class VmnmxOperation : u64 {
+    Mrg_16H = 0,
+    Mrg_16L = 1,
+    Mrg_8B0 = 2,
+    Mrg_8B2 = 3,
+    Acc = 4,
+    Min = 5,
+    Max = 6,
+    Nop = 7,
+};
 enum class XmadMode : u64 {
    None = 0,
    CLo = 1,
@@ -1663,6 +1680,42 @@ union Instruction {
    } vmad;
    union {
+        BitField<54, 1, u64> is_dest_signed;
+        BitField<48, 1, u64> is_src_a_signed;
+        BitField<49, 1, u64> is_src_b_signed;
+        BitField<37, 2, u64> src_format_a;
+        BitField<29, 2, u64> src_format_b;
+        BitField<56, 1, u64> mx;
+        BitField<55, 1, u64> sat;
+        BitField<36, 2, u64> selector_a;
+        BitField<28, 2, u64> selector_b;
+        BitField<50, 1, u64> is_op_b_register;
+        BitField<51, 3, VmnmxOperation> operation;
+        VmnmxType SourceFormatA() const {
+            switch (src_format_a) {
+            case 0b11:
+                return VmnmxType::Bits32;
+            case 0b10:
+                return VmnmxType::Bits16;
+            default:
+                return VmnmxType::Bits8;
+            }
+        }
+        VmnmxType SourceFormatB() const {
+            switch (src_format_b) {
+            case 0b11:
+                return VmnmxType::Bits32;
+            case 0b10:
+                return VmnmxType::Bits16;
+            default:
+                return VmnmxType::Bits8;
+            }
+        }
+    } vmnmx;
+    union {
        BitField<20, 16, u64> imm20_16;
        BitField<35, 1, u64> high_b_rr; // used on RR
        BitField<36, 1, u64> product_shift_left;
@@ -1773,6 +1826,7 @@ public:
        MEMBAR,
        VMAD,
        VSETP,
+        VMNMX,
        FFMA_IMM, // Fused Multiply and Add
        FFMA_CR,
        FFMA_RC,
@@ -2078,6 +2132,7 @@ private:
            INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
            INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
            INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
+            INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
            INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
            INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index b047cf870..64ba60ea2 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -10,16 +10,24 @@
 namespace VideoCommon::Shader {
+using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::VideoType;
 using Tegra::Shader::VmadShr;
+using Tegra::Shader::VmnmxOperation;
+using Tegra::Shader::VmnmxType;
 u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
+    if (opcode->get().GetId() == OpCode::Id::VMNMX) {
+        DecodeVMNMX(bb, instr);
+        return pc;
+    }
    const Node op_a =
        GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
                        instr.video.type_a, instr.video.byte_height_a);
@@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
    }
 }
+void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
+    UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
+    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
+    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
+    UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
+    UNIMPLEMENTED_IF(instr.vmnmx.sat);
+    UNIMPLEMENTED_IF(instr.generates_cc);
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = GetRegister(instr.gpr20);
+    Node op_c = GetRegister(instr.gpr39);
+    const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
+    const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
+    const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
+    Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
+    switch (instr.vmnmx.operation) {
+    case VmnmxOperation::Mrg_16H:
+        value = BitfieldInsert(move(op_c), move(value), 16, 16);
+        break;
+    case VmnmxOperation::Mrg_16L:
+        value = BitfieldInsert(move(op_c), move(value), 0, 16);
+        break;
+    case VmnmxOperation::Mrg_8B0:
+        value = BitfieldInsert(move(op_c), move(value), 0, 8);
+        break;
+    case VmnmxOperation::Mrg_8B2:
+        value = BitfieldInsert(move(op_c), move(value), 16, 8);
+        break;
+    case VmnmxOperation::Acc:
+        value = Operation(OperationCode::IAdd, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Min:
+        value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Max:
+        value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
+        break;
+    case VmnmxOperation::Nop:
+        break;
+    default:
+        UNREACHABLE();
+        break;
+    }
+    SetRegister(bb, instr.gpr0, move(value));
+}
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 80fc9b82c..0f1ebef1b 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -350,6 +350,9 @@ private:
    /// Marks the usage of a input or output attribute.
    void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
+    /// Decodes VMNMX instruction and inserts its code into the passed basic block.
+    void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
    void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                  const Node4& components);
author	ReinUsesLisp	2020-03-28 18:49:50 -0300
committer	ReinUsesLisp	2020-04-12 00:34:42 -0300
commit	76f178ba6e7cc2925ffada341d1e14fb159e93c7 (patch)
tree	fec59062ca81807bd8ac4dc8e0216fe2f5c27384 /src
parent	shader_bytecode: Fix I2I_IMM encoding (diff)
download	yuzu-76f178ba6e7cc2925ffada341d1e14fb159e93c7.tar.gz yuzu-76f178ba6e7cc2925ffada341d1e14fb159e93c7.tar.xz yuzu-76f178ba6e7cc2925ffada341d1e14fb159e93c7.zip

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index fcb2d7935..7400e1aa9 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h
@@ -302,6 +302,23 @@ enum class VmadShr : u64 {
302	Shr15 = 2,	302	Shr15 = 2,
303	};	303	};
304		304
		305	enum class VmnmxType : u64 {
		306	Bits8,
		307	Bits16,
		308	Bits32,
		309	};
		310
		311	enum class VmnmxOperation : u64 {
		312	Mrg_16H = 0,
		313	Mrg_16L = 1,
		314	Mrg_8B0 = 2,
		315	Mrg_8B2 = 3,
		316	Acc = 4,
		317	Min = 5,
		318	Max = 6,
		319	Nop = 7,
		320	};
		321
305	enum class XmadMode : u64 {	322	enum class XmadMode : u64 {
306	None = 0,	323	None = 0,
307	CLo = 1,	324	CLo = 1,
@@ -1663,6 +1680,42 @@ union Instruction {
1663	} vmad;	1680	} vmad;
1664		1681
1665	union {	1682	union {
		1683	BitField<54, 1, u64> is_dest_signed;
		1684	BitField<48, 1, u64> is_src_a_signed;
		1685	BitField<49, 1, u64> is_src_b_signed;
		1686	BitField<37, 2, u64> src_format_a;
		1687	BitField<29, 2, u64> src_format_b;
		1688	BitField<56, 1, u64> mx;
		1689	BitField<55, 1, u64> sat;
		1690	BitField<36, 2, u64> selector_a;
		1691	BitField<28, 2, u64> selector_b;
		1692	BitField<50, 1, u64> is_op_b_register;
		1693	BitField<51, 3, VmnmxOperation> operation;
		1694
		1695	VmnmxType SourceFormatA() const {
		1696	switch (src_format_a) {
		1697	case 0b11:
		1698	return VmnmxType::Bits32;
		1699	case 0b10:
		1700	return VmnmxType::Bits16;
		1701	default:
		1702	return VmnmxType::Bits8;
		1703	}
		1704	}
		1705
		1706	VmnmxType SourceFormatB() const {
		1707	switch (src_format_b) {
		1708	case 0b11:
		1709	return VmnmxType::Bits32;
		1710	case 0b10:
		1711	return VmnmxType::Bits16;
		1712	default:
		1713	return VmnmxType::Bits8;
		1714	}
		1715	}
		1716	} vmnmx;
		1717
		1718	union {
1666	BitField<20, 16, u64> imm20_16;	1719	BitField<20, 16, u64> imm20_16;
1667	BitField<35, 1, u64> high_b_rr; // used on RR	1720	BitField<35, 1, u64> high_b_rr; // used on RR
1668	BitField<36, 1, u64> product_shift_left;	1721	BitField<36, 1, u64> product_shift_left;
@@ -1773,6 +1826,7 @@ public:
1773	MEMBAR,	1826	MEMBAR,
1774	VMAD,	1827	VMAD,
1775	VSETP,	1828	VSETP,
		1829	VMNMX,
1776	FFMA_IMM, // Fused Multiply and Add	1830	FFMA_IMM, // Fused Multiply and Add
1777	FFMA_CR,	1831	FFMA_CR,
1778	FFMA_RC,	1832	FFMA_RC,
@@ -2078,6 +2132,7 @@ private:
2078	INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),	2132	INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
2079	INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),	2133	INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
2080	INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),	2134	INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
		2135	INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
2081	INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),	2136	INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
2082	INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),	2137	INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
2083	INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),	2138	INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),


diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index b047cf870..64ba60ea2 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp
@@ -10,16 +10,24 @@
10		10
11	namespace VideoCommon::Shader {	11	namespace VideoCommon::Shader {
12		12
		13	using std::move;
13	using Tegra::Shader::Instruction;	14	using Tegra::Shader::Instruction;
14	using Tegra::Shader::OpCode;	15	using Tegra::Shader::OpCode;
15	using Tegra::Shader::Pred;	16	using Tegra::Shader::Pred;
16	using Tegra::Shader::VideoType;	17	using Tegra::Shader::VideoType;
17	using Tegra::Shader::VmadShr;	18	using Tegra::Shader::VmadShr;
		19	using Tegra::Shader::VmnmxOperation;
		20	using Tegra::Shader::VmnmxType;
18		21
19	u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {	22	u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
20	const Instruction instr = {program_code[pc]};	23	const Instruction instr = {program_code[pc]};
21	const auto opcode = OpCode::Decode(instr);	24	const auto opcode = OpCode::Decode(instr);
22		25
		26	if (opcode->get().GetId() == OpCode::Id::VMNMX) {
		27	DecodeVMNMX(bb, instr);
		28	return pc;
		29	}
		30
23	const Node op_a =	31	const Node op_a =
24	GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,	32	GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
25	instr.video.type_a, instr.video.byte_height_a);	33	instr.video.type_a, instr.video.byte_height_a);
@@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
109	}	117	}
110	}	118	}
111		119
		120	void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
		121	UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
		122	UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
		123	UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
		124	UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
		125	UNIMPLEMENTED_IF(instr.vmnmx.sat);
		126	UNIMPLEMENTED_IF(instr.generates_cc);
		127
		128	Node op_a = GetRegister(instr.gpr8);
		129	Node op_b = GetRegister(instr.gpr20);
		130	Node op_c = GetRegister(instr.gpr39);
		131
		132	const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
		133	const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
		134
		135	const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
		136	Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
		137
		138	switch (instr.vmnmx.operation) {
		139	case VmnmxOperation::Mrg_16H:
		140	value = BitfieldInsert(move(op_c), move(value), 16, 16);
		141	break;
		142	case VmnmxOperation::Mrg_16L:
		143	value = BitfieldInsert(move(op_c), move(value), 0, 16);
		144	break;
		145	case VmnmxOperation::Mrg_8B0:
		146	value = BitfieldInsert(move(op_c), move(value), 0, 8);
		147	break;
		148	case VmnmxOperation::Mrg_8B2:
		149	value = BitfieldInsert(move(op_c), move(value), 16, 8);
		150	break;
		151	case VmnmxOperation::Acc:
		152	value = Operation(OperationCode::IAdd, move(value), move(op_c));
		153	break;
		154	case VmnmxOperation::Min:
		155	value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
		156	break;
		157	case VmnmxOperation::Max:
		158	value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
		159	break;
		160	case VmnmxOperation::Nop:
		161	break;
		162	default:
		163	UNREACHABLE();
		164	break;
		165	}
		166
		167	SetRegister(bb, instr.gpr0, move(value));
		168	}
		169
112	} // namespace VideoCommon::Shader	170	} // namespace VideoCommon::Shader


diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 80fc9b82c..0f1ebef1b 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h
@@ -350,6 +350,9 @@ private:
350	/// Marks the usage of a input or output attribute.	350	/// Marks the usage of a input or output attribute.
351	void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);	351	void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
352		352
		353	/// Decodes VMNMX instruction and inserts its code into the passed basic block.
		354	void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
		355
353	void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,	356	void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
354	const Node4& components);	357	const Node4& components);
355		358