Merge pull request #1477 from ReinUsesLisp/vmad

gl_shader_decompiler: Implement VMAD
author: bunnei 2018-10-11 16:51:09 -0400
committer: GitHub 2018-10-11 16:51:09 -0400
commit: 83ac3e63959382c6341febfe997813dbd332876d (patch)
tree: 25649d373703fcc143a2cfb5688d3698a2d9e1cf /src/video_core
parent: Merge pull request #1458 from FernandoS27/fix-render-target-block-settings (diff)
parent: gl_shader_decompiler: Implement VMAD (diff)
download: yuzu-83ac3e63959382c6341febfe997813dbd332876d.tar.gz
yuzu-83ac3e63959382c6341febfe997813dbd332876d.tar.xz
yuzu-83ac3e63959382c6341febfe997813dbd332876d.zip
2 files changed, 118 insertions, 0 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 550ab1148..9a59b65b3 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -214,6 +214,18 @@ enum class IMinMaxExchange : u64 {
    XHi = 3,
 };
+enum class VmadType : u64 {
+    Size16_Low = 0,
+    Size16_High = 1,
+    Size32 = 2,
+    Invalid = 3,
+};
+enum class VmadShr : u64 {
+    Shr7 = 1,
+    Shr15 = 2,
+};
 enum class XmadMode : u64 {
    None = 0,
    CLo = 1,
@@ -452,6 +464,7 @@ union Instruction {
    BitField<48, 16, u64> opcode;
    union {
+        BitField<20, 16, u64> imm20_16;
        BitField<20, 19, u64> imm20_19;
        BitField<20, 32, s64> imm20_32;
        BitField<45, 1, u64> negate_b;
@@ -493,6 +506,10 @@ union Instruction {
            }
        } lop3;
+        u16 GetImm20_16() const {
+            return static_cast<u16>(imm20_16);
+        }
        u32 GetImm20_19() const {
            u32 imm{static_cast<u32>(imm20_19)};
            imm <<= 12;
@@ -1017,6 +1034,23 @@ union Instruction {
    } isberd;
    union {
+        BitField<48, 1, u64> signed_a;
+        BitField<38, 1, u64> is_byte_chunk_a;
+        BitField<36, 2, VmadType> type_a;
+        BitField<36, 2, u64> byte_height_a;
+        BitField<49, 1, u64> signed_b;
+        BitField<50, 1, u64> use_register_b;
+        BitField<30, 1, u64> is_byte_chunk_b;
+        BitField<28, 2, VmadType> type_b;
+        BitField<28, 2, u64> byte_height_b;
+        BitField<51, 2, VmadShr> shr;
+        BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
+        BitField<47, 1, u64> cc;
+    } vmad;
+    union {
        BitField<20, 16, u64> imm20_16;
        BitField<36, 1, u64> product_shift_left;
        BitField<37, 1, u64> merge_37;
@@ -1083,6 +1117,7 @@ public:
        IPA,
        OUT_R, // Emit vertex/primitive
        ISBERD,
+        VMAD,
        FFMA_IMM, // Fused Multiply and Add
        FFMA_CR,
        FFMA_RC,
@@ -1320,6 +1355,7 @@ private:
            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
            INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
+            INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
            INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
            INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c82a0dcfa..8dfb49507 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2953,6 +2953,88 @@ private:
                LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
                break;
            }
+            case OpCode::Id::VMAD: {
+                const bool signed_a = instr.vmad.signed_a == 1;
+                const bool signed_b = instr.vmad.signed_b == 1;
+                const bool result_signed = signed_a || signed_b;
+                boost::optional<std::string> forced_result;
+                auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed,
+                                  Tegra::Shader::VmadType type, u64 byte_height) {
+                    const std::string value = [&]() {
+                        if (!is_chunk) {
+                            const auto offset = static_cast<u32>(byte_height * 8);
+                            return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
+                        }
+                        const std::string zero = "0";
+                        switch (type) {
+                        case Tegra::Shader::VmadType::Size16_Low:
+                            return '(' + op + " & 0xffff)";
+                        case Tegra::Shader::VmadType::Size16_High:
+                            return '(' + op + " >> 16)";
+                        case Tegra::Shader::VmadType::Size32:
+                            // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
+                            // this type is used (1 * 1 + 0 == 0x5b800000). Until a better
+                            // explanation is found: assert.
+                            UNREACHABLE_MSG("Unimplemented");
+                            return zero;
+                        case Tegra::Shader::VmadType::Invalid:
+                            // Note(Rodrigo): This flag is invalid according to nvdisasm. From my
+                            // testing (even though it's invalid) this makes the whole instruction
+                            // assign zero to target register.
+                            forced_result = boost::make_optional(zero);
+                            return zero;
+                        default:
+                            UNREACHABLE();
+                            return zero;
+                        }
+                    }();
+                    if (is_signed) {
+                        return "int(" + value + ')';
+                    }
+                    return value;
+                };
+                const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
+                                                instr.vmad.is_byte_chunk_a != 0, signed_a,
+                                                instr.vmad.type_a, instr.vmad.byte_height_a);
+                std::string op_b;
+                if (instr.vmad.use_register_b) {
+                    op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
+                                  instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b,
+                                  instr.vmad.byte_height_b);
+                } else {
+                    op_b = '(' +
+                           std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
+                                                   : instr.alu.GetImm20_16()) +
+                           ')';
+                }
+                const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
+                std::string result;
+                if (forced_result) {
+                    result = *forced_result;
+                } else {
+                    result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
+                    switch (instr.vmad.shr) {
+                    case Tegra::Shader::VmadShr::Shr7:
+                        result = '(' + result + " >> 7)";
+                        break;
+                    case Tegra::Shader::VmadShr::Shr15:
+                        result = '(' + result + " >> 15)";
+                        break;
+                    }
+                }
+                regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
+                                          instr.vmad.saturate == 1, 0, Register::Size::Word,
+                                          instr.vmad.cc);
+                break;
+            }
            default: {
                LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
                UNREACHABLE();
author	bunnei	2018-10-11 16:51:09 -0400
committer	GitHub	2018-10-11 16:51:09 -0400
commit	83ac3e63959382c6341febfe997813dbd332876d (patch)
tree	25649d373703fcc143a2cfb5688d3698a2d9e1cf /src/video_core
parent	Merge pull request #1458 from FernandoS27/fix-render-target-block-settings (diff)
parent	gl_shader_decompiler: Implement VMAD (diff)
download	yuzu-83ac3e63959382c6341febfe997813dbd332876d.tar.gz yuzu-83ac3e63959382c6341febfe997813dbd332876d.tar.xz yuzu-83ac3e63959382c6341febfe997813dbd332876d.zip

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 550ab1148..9a59b65b3 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h
@@ -214,6 +214,18 @@ enum class IMinMaxExchange : u64 {
214	XHi = 3,	214	XHi = 3,
215	};	215	};
216		216
		217	enum class VmadType : u64 {
		218	Size16_Low = 0,
		219	Size16_High = 1,
		220	Size32 = 2,
		221	Invalid = 3,
		222	};
		223
		224	enum class VmadShr : u64 {
		225	Shr7 = 1,
		226	Shr15 = 2,
		227	};
		228
217	enum class XmadMode : u64 {	229	enum class XmadMode : u64 {
218	None = 0,	230	None = 0,
219	CLo = 1,	231	CLo = 1,
@@ -452,6 +464,7 @@ union Instruction {
452	BitField<48, 16, u64> opcode;	464	BitField<48, 16, u64> opcode;
453		465
454	union {	466	union {
		467	BitField<20, 16, u64> imm20_16;
455	BitField<20, 19, u64> imm20_19;	468	BitField<20, 19, u64> imm20_19;
456	BitField<20, 32, s64> imm20_32;	469	BitField<20, 32, s64> imm20_32;
457	BitField<45, 1, u64> negate_b;	470	BitField<45, 1, u64> negate_b;
@@ -493,6 +506,10 @@ union Instruction {
493	}	506	}
494	} lop3;	507	} lop3;
495		508
		509	u16 GetImm20_16() const {
		510	return static_cast<u16>(imm20_16);
		511	}
		512
496	u32 GetImm20_19() const {	513	u32 GetImm20_19() const {
497	u32 imm{static_cast<u32>(imm20_19)};	514	u32 imm{static_cast<u32>(imm20_19)};
498	imm <<= 12;	515	imm <<= 12;
@@ -1017,6 +1034,23 @@ union Instruction {
1017	} isberd;	1034	} isberd;
1018		1035
1019	union {	1036	union {
		1037	BitField<48, 1, u64> signed_a;
		1038	BitField<38, 1, u64> is_byte_chunk_a;
		1039	BitField<36, 2, VmadType> type_a;
		1040	BitField<36, 2, u64> byte_height_a;
		1041
		1042	BitField<49, 1, u64> signed_b;
		1043	BitField<50, 1, u64> use_register_b;
		1044	BitField<30, 1, u64> is_byte_chunk_b;
		1045	BitField<28, 2, VmadType> type_b;
		1046	BitField<28, 2, u64> byte_height_b;
		1047
		1048	BitField<51, 2, VmadShr> shr;
		1049	BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
		1050	BitField<47, 1, u64> cc;
		1051	} vmad;
		1052
		1053	union {
1020	BitField<20, 16, u64> imm20_16;	1054	BitField<20, 16, u64> imm20_16;
1021	BitField<36, 1, u64> product_shift_left;	1055	BitField<36, 1, u64> product_shift_left;
1022	BitField<37, 1, u64> merge_37;	1056	BitField<37, 1, u64> merge_37;
@@ -1083,6 +1117,7 @@ public:
1083	IPA,	1117	IPA,
1084	OUT_R, // Emit vertex/primitive	1118	OUT_R, // Emit vertex/primitive
1085	ISBERD,	1119	ISBERD,
		1120	VMAD,
1086	FFMA_IMM, // Fused Multiply and Add	1121	FFMA_IMM, // Fused Multiply and Add
1087	FFMA_CR,	1122	FFMA_CR,
1088	FFMA_RC,	1123	FFMA_RC,
@@ -1320,6 +1355,7 @@ private:
1320	INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),	1355	INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1321	INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),	1356	INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
1322	INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),	1357	INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
		1358	INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
1323	INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),	1359	INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
1324	INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),	1360	INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
1325	INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),	1361	INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),


diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c82a0dcfa..8dfb49507 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2953,6 +2953,88 @@ private:
2953	LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");	2953	LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
2954	break;	2954	break;
2955	}	2955	}
		2956	case OpCode::Id::VMAD: {
		2957	const bool signed_a = instr.vmad.signed_a == 1;
		2958	const bool signed_b = instr.vmad.signed_b == 1;
		2959	const bool result_signed = signed_a \|\| signed_b;
		2960	boost::optional<std::string> forced_result;
		2961
		2962	auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed,
		2963	Tegra::Shader::VmadType type, u64 byte_height) {
		2964	const std::string value = [&]() {
		2965	if (!is_chunk) {
		2966	const auto offset = static_cast<u32>(byte_height * 8);
		2967	return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
		2968	}
		2969	const std::string zero = "0";
		2970
		2971	switch (type) {
		2972	case Tegra::Shader::VmadType::Size16_Low:
		2973	return '(' + op + " & 0xffff)";
		2974	case Tegra::Shader::VmadType::Size16_High:
		2975	return '(' + op + " >> 16)";
		2976	case Tegra::Shader::VmadType::Size32:
		2977	// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
		2978	// this type is used (1 * 1 + 0 == 0x5b800000). Until a better
		2979	// explanation is found: assert.
		2980	UNREACHABLE_MSG("Unimplemented");
		2981	return zero;
		2982	case Tegra::Shader::VmadType::Invalid:
		2983	// Note(Rodrigo): This flag is invalid according to nvdisasm. From my
		2984	// testing (even though it's invalid) this makes the whole instruction
		2985	// assign zero to target register.
		2986	forced_result = boost::make_optional(zero);
		2987	return zero;
		2988	default:
		2989	UNREACHABLE();
		2990	return zero;
		2991	}
		2992	}();
		2993
		2994	if (is_signed) {
		2995	return "int(" + value + ')';
		2996	}
		2997	return value;
		2998	};
		2999
		3000	const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
		3001	instr.vmad.is_byte_chunk_a != 0, signed_a,
		3002	instr.vmad.type_a, instr.vmad.byte_height_a);
		3003
		3004	std::string op_b;
		3005	if (instr.vmad.use_register_b) {
		3006	op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
		3007	instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b,
		3008	instr.vmad.byte_height_b);
		3009	} else {
		3010	op_b = '(' +
		3011	std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
		3012	: instr.alu.GetImm20_16()) +
		3013	')';
		3014	}
		3015
		3016	const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
		3017
		3018	std::string result;
		3019	if (forced_result) {
		3020	result = *forced_result;
		3021	} else {
		3022	result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
		3023
		3024	switch (instr.vmad.shr) {
		3025	case Tegra::Shader::VmadShr::Shr7:
		3026	result = '(' + result + " >> 7)";
		3027	break;
		3028	case Tegra::Shader::VmadShr::Shr15:
		3029	result = '(' + result + " >> 15)";
		3030	break;
		3031	}
		3032	}
		3033	regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
		3034	instr.vmad.saturate == 1, 0, Register::Size::Word,
		3035	instr.vmad.cc);
		3036	break;
		3037	}
2956	default: {	3038	default: {
2957	LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());	3039	LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
2958	UNREACHABLE();	3040	UNREACHABLE();