summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar bunnei2018-10-11 16:51:09 -0400
committerGravatar GitHub2018-10-11 16:51:09 -0400
commit83ac3e63959382c6341febfe997813dbd332876d (patch)
tree25649d373703fcc143a2cfb5688d3698a2d9e1cf /src/video_core
parentMerge pull request #1458 from FernandoS27/fix-render-target-block-settings (diff)
parentgl_shader_decompiler: Implement VMAD (diff)
downloadyuzu-83ac3e63959382c6341febfe997813dbd332876d.tar.gz
yuzu-83ac3e63959382c6341febfe997813dbd332876d.tar.xz
yuzu-83ac3e63959382c6341febfe997813dbd332876d.zip
Merge pull request #1477 from ReinUsesLisp/vmad
gl_shader_decompiler: Implement VMAD
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h36
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp82
2 files changed, 118 insertions, 0 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 550ab1148..9a59b65b3 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -214,6 +214,18 @@ enum class IMinMaxExchange : u64 {
214 XHi = 3, 214 XHi = 3,
215}; 215};
216 216
217enum class VmadType : u64 {
218 Size16_Low = 0,
219 Size16_High = 1,
220 Size32 = 2,
221 Invalid = 3,
222};
223
224enum class VmadShr : u64 {
225 Shr7 = 1,
226 Shr15 = 2,
227};
228
217enum class XmadMode : u64 { 229enum class XmadMode : u64 {
218 None = 0, 230 None = 0,
219 CLo = 1, 231 CLo = 1,
@@ -452,6 +464,7 @@ union Instruction {
452 BitField<48, 16, u64> opcode; 464 BitField<48, 16, u64> opcode;
453 465
454 union { 466 union {
467 BitField<20, 16, u64> imm20_16;
455 BitField<20, 19, u64> imm20_19; 468 BitField<20, 19, u64> imm20_19;
456 BitField<20, 32, s64> imm20_32; 469 BitField<20, 32, s64> imm20_32;
457 BitField<45, 1, u64> negate_b; 470 BitField<45, 1, u64> negate_b;
@@ -493,6 +506,10 @@ union Instruction {
493 } 506 }
494 } lop3; 507 } lop3;
495 508
509 u16 GetImm20_16() const {
510 return static_cast<u16>(imm20_16);
511 }
512
496 u32 GetImm20_19() const { 513 u32 GetImm20_19() const {
497 u32 imm{static_cast<u32>(imm20_19)}; 514 u32 imm{static_cast<u32>(imm20_19)};
498 imm <<= 12; 515 imm <<= 12;
@@ -1017,6 +1034,23 @@ union Instruction {
1017 } isberd; 1034 } isberd;
1018 1035
1019 union { 1036 union {
1037 BitField<48, 1, u64> signed_a;
1038 BitField<38, 1, u64> is_byte_chunk_a;
1039 BitField<36, 2, VmadType> type_a;
1040 BitField<36, 2, u64> byte_height_a;
1041
1042 BitField<49, 1, u64> signed_b;
1043 BitField<50, 1, u64> use_register_b;
1044 BitField<30, 1, u64> is_byte_chunk_b;
1045 BitField<28, 2, VmadType> type_b;
1046 BitField<28, 2, u64> byte_height_b;
1047
1048 BitField<51, 2, VmadShr> shr;
1049 BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
1050 BitField<47, 1, u64> cc;
1051 } vmad;
1052
1053 union {
1020 BitField<20, 16, u64> imm20_16; 1054 BitField<20, 16, u64> imm20_16;
1021 BitField<36, 1, u64> product_shift_left; 1055 BitField<36, 1, u64> product_shift_left;
1022 BitField<37, 1, u64> merge_37; 1056 BitField<37, 1, u64> merge_37;
@@ -1083,6 +1117,7 @@ public:
1083 IPA, 1117 IPA,
1084 OUT_R, // Emit vertex/primitive 1118 OUT_R, // Emit vertex/primitive
1085 ISBERD, 1119 ISBERD,
1120 VMAD,
1086 FFMA_IMM, // Fused Multiply and Add 1121 FFMA_IMM, // Fused Multiply and Add
1087 FFMA_CR, 1122 FFMA_CR,
1088 FFMA_RC, 1123 FFMA_RC,
@@ -1320,6 +1355,7 @@ private:
1320 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1355 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1321 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1356 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
1322 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), 1357 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
1358 INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
1323 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), 1359 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
1324 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), 1360 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
1325 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), 1361 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c82a0dcfa..8dfb49507 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2953,6 +2953,88 @@ private:
2953 LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); 2953 LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
2954 break; 2954 break;
2955 } 2955 }
2956 case OpCode::Id::VMAD: {
2957 const bool signed_a = instr.vmad.signed_a == 1;
2958 const bool signed_b = instr.vmad.signed_b == 1;
2959 const bool result_signed = signed_a || signed_b;
2960 boost::optional<std::string> forced_result;
2961
2962 auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed,
2963 Tegra::Shader::VmadType type, u64 byte_height) {
2964 const std::string value = [&]() {
2965 if (!is_chunk) {
2966 const auto offset = static_cast<u32>(byte_height * 8);
2967 return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
2968 }
2969 const std::string zero = "0";
2970
2971 switch (type) {
2972 case Tegra::Shader::VmadType::Size16_Low:
2973 return '(' + op + " & 0xffff)";
2974 case Tegra::Shader::VmadType::Size16_High:
2975 return '(' + op + " >> 16)";
2976 case Tegra::Shader::VmadType::Size32:
2977 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
2978 // this type is used (1 * 1 + 0 == 0x5b800000). Until a better
2979 // explanation is found: assert.
2980 UNREACHABLE_MSG("Unimplemented");
2981 return zero;
2982 case Tegra::Shader::VmadType::Invalid:
2983 // Note(Rodrigo): This flag is invalid according to nvdisasm. From my
2984 // testing (even though it's invalid) this makes the whole instruction
2985 // assign zero to target register.
2986 forced_result = boost::make_optional(zero);
2987 return zero;
2988 default:
2989 UNREACHABLE();
2990 return zero;
2991 }
2992 }();
2993
2994 if (is_signed) {
2995 return "int(" + value + ')';
2996 }
2997 return value;
2998 };
2999
3000 const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
3001 instr.vmad.is_byte_chunk_a != 0, signed_a,
3002 instr.vmad.type_a, instr.vmad.byte_height_a);
3003
3004 std::string op_b;
3005 if (instr.vmad.use_register_b) {
3006 op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
3007 instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b,
3008 instr.vmad.byte_height_b);
3009 } else {
3010 op_b = '(' +
3011 std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
3012 : instr.alu.GetImm20_16()) +
3013 ')';
3014 }
3015
3016 const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
3017
3018 std::string result;
3019 if (forced_result) {
3020 result = *forced_result;
3021 } else {
3022 result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
3023
3024 switch (instr.vmad.shr) {
3025 case Tegra::Shader::VmadShr::Shr7:
3026 result = '(' + result + " >> 7)";
3027 break;
3028 case Tegra::Shader::VmadShr::Shr15:
3029 result = '(' + result + " >> 15)";
3030 break;
3031 }
3032 }
3033 regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
3034 instr.vmad.saturate == 1, 0, Register::Size::Word,
3035 instr.vmad.cc);
3036 break;
3037 }
2956 default: { 3038 default: {
2957 LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); 3039 LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
2958 UNREACHABLE(); 3040 UNREACHABLE();