diff options
Diffstat (limited to '')
16 files changed, 1039 insertions, 178 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 49d1f4bfb..bd1f96c07 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { | |||
| 56 | Optimization::Invoke(Optimization::IdentityRemovalPass, function); | 56 | Optimization::Invoke(Optimization::IdentityRemovalPass, function); |
| 57 | // Optimization::Invoke(Optimization::VerificationPass, function); | 57 | // Optimization::Invoke(Optimization::VerificationPass, function); |
| 58 | } | 58 | } |
| 59 | //*/ | ||
| 59 | } | 60 | } |
| 60 | 61 | ||
| 61 | std::string DumpProgram(const Program& program) { | 62 | std::string DumpProgram(const Program& program) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..3da37a2bb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | enum class FpRounding : u64 { | ||
| 15 | RN, | ||
| 16 | RM, | ||
| 17 | RP, | ||
| 18 | RZ, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class FmzMode : u64 { | ||
| 22 | None, | ||
| 23 | FTZ, | ||
| 24 | FMZ, | ||
| 25 | INVALIDFMZ3, | ||
| 26 | }; | ||
| 27 | |||
| 28 | inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { | ||
| 29 | switch (fp_rounding) { | ||
| 30 | case FpRounding::RN: | ||
| 31 | return IR::FpRounding::RN; | ||
| 32 | case FpRounding::RM: | ||
| 33 | return IR::FpRounding::RM; | ||
| 34 | case FpRounding::RP: | ||
| 35 | return IR::FpRounding::RP; | ||
| 36 | case FpRounding::RZ: | ||
| 37 | return IR::FpRounding::RZ; | ||
| 38 | } | ||
| 39 | throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); | ||
| 40 | } | ||
| 41 | |||
| 42 | inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { | ||
| 43 | switch (fmz_mode) { | ||
| 44 | case FmzMode::None: | ||
| 45 | return IR::FmzMode::None; | ||
| 46 | case FmzMode::FTZ: | ||
| 47 | return IR::FmzMode::FTZ; | ||
| 48 | case FmzMode::FMZ: | ||
| 49 | return IR::FmzMode::FMZ; | ||
| 50 | case FmzMode::INVALIDFMZ3: | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); | ||
| 54 | } | ||
| 55 | |||
| 56 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..d2c44b9cc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, | ||
| 14 | const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { | ||
| 15 | union { | ||
| 16 | u64 raw; | ||
| 17 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 18 | BitField<8, 8, IR::Reg> src_a; | ||
| 19 | } const fadd{insn}; | ||
| 20 | |||
| 21 | if (sat) { | ||
| 22 | throw NotImplementedException("FADD SAT"); | ||
| 23 | } | ||
| 24 | if (cc) { | ||
| 25 | throw NotImplementedException("FADD CC"); | ||
| 26 | } | ||
| 27 | const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; | ||
| 28 | const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; | ||
| 29 | IR::FpControl control{ | ||
| 30 | .no_contraction{true}, | ||
| 31 | .rounding{CastFpRounding(fp_rounding)}, | ||
| 32 | .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||
| 33 | }; | ||
| 34 | v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { | ||
| 38 | union { | ||
| 39 | u64 raw; | ||
| 40 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 41 | BitField<44, 1, u64> ftz; | ||
| 42 | BitField<45, 1, u64> neg_b; | ||
| 43 | BitField<46, 1, u64> abs_a; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<48, 1, u64> neg_a; | ||
| 46 | BitField<49, 1, u64> abs_b; | ||
| 47 | BitField<50, 1, u64> sat; | ||
| 48 | } const fadd{insn}; | ||
| 49 | |||
| 50 | FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, | ||
| 51 | fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); | ||
| 52 | } | ||
| 53 | } // Anonymous namespace | ||
| 54 | |||
| 55 | void TranslatorVisitor::FADD_reg(u64 insn) { | ||
| 56 | FADD(*this, insn, GetReg20(insn)); | ||
| 57 | } | ||
| 58 | |||
| 59 | void TranslatorVisitor::FADD_cbuf(u64) { | ||
| 60 | throw NotImplementedException("FADD (cbuf)"); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::FADD_imm(u64) { | ||
| 64 | throw NotImplementedException("FADD (imm)"); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::FADD32I(u64) { | ||
| 68 | throw NotImplementedException("FADD32I"); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..30ca052ec --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, | ||
| 13 | bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a; | ||
| 18 | } const ffma{insn}; | ||
| 19 | |||
| 20 | if (sat) { | ||
| 21 | throw NotImplementedException("FFMA SAT"); | ||
| 22 | } | ||
| 23 | if (cc) { | ||
| 24 | throw NotImplementedException("FFMA CC"); | ||
| 25 | } | ||
| 26 | const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; | ||
| 27 | const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 28 | const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; | ||
| 29 | const IR::FpControl fp_control{ | ||
| 30 | .no_contraction{true}, | ||
| 31 | .rounding{CastFpRounding(fp_rounding)}, | ||
| 32 | .fmz_mode{CastFmzMode(fmz_mode)}, | ||
| 33 | }; | ||
| 34 | v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { | ||
| 38 | union { | ||
| 39 | u64 raw; | ||
| 40 | BitField<47, 1, u64> cc; | ||
| 41 | BitField<48, 1, u64> neg_b; | ||
| 42 | BitField<49, 1, u64> neg_c; | ||
| 43 | BitField<50, 1, u64> sat; | ||
| 44 | BitField<51, 2, FpRounding> fp_rounding; | ||
| 45 | BitField<53, 2, FmzMode> fmz_mode; | ||
| 46 | } const ffma{insn}; | ||
| 47 | |||
| 48 | FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, | ||
| 49 | ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); | ||
| 50 | } | ||
| 51 | } // Anonymous namespace | ||
| 52 | |||
| 53 | void TranslatorVisitor::FFMA_reg(u64 insn) { | ||
| 54 | FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); | ||
| 55 | } | ||
| 56 | |||
| 57 | void TranslatorVisitor::FFMA_rc(u64) { | ||
| 58 | throw NotImplementedException("FFMA (rc)"); | ||
| 59 | } | ||
| 60 | |||
| 61 | void TranslatorVisitor::FFMA_cr(u64 insn) { | ||
| 62 | FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||
| 63 | } | ||
| 64 | |||
| 65 | void TranslatorVisitor::FFMA_imm(u64) { | ||
| 66 | throw NotImplementedException("FFMA (imm)"); | ||
| 67 | } | ||
| 68 | |||
| 69 | void TranslatorVisitor::FFMA32I(u64) { | ||
| 70 | throw NotImplementedException("FFMA32I"); | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..743a1e2f0 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class Scale : u64 { | ||
| 14 | None, | ||
| 15 | D2, | ||
| 16 | D4, | ||
| 17 | D8, | ||
| 18 | M8, | ||
| 19 | M4, | ||
| 20 | M2, | ||
| 21 | INVALIDSCALE37, | ||
| 22 | }; | ||
| 23 | |||
| 24 | float ScaleFactor(Scale scale) { | ||
| 25 | switch (scale) { | ||
| 26 | case Scale::None: | ||
| 27 | return 1.0f; | ||
| 28 | case Scale::D2: | ||
| 29 | return 1.0f / 2.0f; | ||
| 30 | case Scale::D4: | ||
| 31 | return 1.0f / 4.0f; | ||
| 32 | case Scale::D8: | ||
| 33 | return 1.0f / 8.0f; | ||
| 34 | case Scale::M8: | ||
| 35 | return 8.0f; | ||
| 36 | case Scale::M4: | ||
| 37 | return 4.0f; | ||
| 38 | case Scale::M2: | ||
| 39 | return 2.0f; | ||
| 40 | case Scale::INVALIDSCALE37: | ||
| 41 | break; | ||
| 42 | } | ||
| 43 | throw NotImplementedException("Invalid FMUL scale {}", scale); | ||
| 44 | } | ||
| 45 | |||
| 46 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, | ||
| 47 | FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { | ||
| 48 | union { | ||
| 49 | u64 raw; | ||
| 50 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 51 | BitField<8, 8, IR::Reg> src_a; | ||
| 52 | } const fmul{insn}; | ||
| 53 | |||
| 54 | if (cc) { | ||
| 55 | throw NotImplementedException("FMUL CC"); | ||
| 56 | } | ||
| 57 | if (sat) { | ||
| 58 | throw NotImplementedException("FMUL SAT"); | ||
| 59 | } | ||
| 60 | IR::U32 op_a{v.X(fmul.src_a)}; | ||
| 61 | if (scale != Scale::None) { | ||
| 62 | if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { | ||
| 63 | throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); | ||
| 64 | } | ||
| 65 | op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); | ||
| 66 | } | ||
| 67 | const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 68 | const IR::FpControl fp_control{ | ||
| 69 | .no_contraction{true}, | ||
| 70 | .rounding{CastFpRounding(fp_rounding)}, | ||
| 71 | .fmz_mode{CastFmzMode(fmz_mode)}, | ||
| 72 | }; | ||
| 73 | v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); | ||
| 74 | } | ||
| 75 | |||
| 76 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { | ||
| 77 | union { | ||
| 78 | u64 raw; | ||
| 79 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 80 | BitField<41, 3, Scale> scale; | ||
| 81 | BitField<44, 2, FmzMode> fmz; | ||
| 82 | BitField<47, 1, u64> cc; | ||
| 83 | BitField<48, 1, u64> neg_b; | ||
| 84 | BitField<50, 1, u64> sat; | ||
| 85 | } fmul{insn}; | ||
| 86 | |||
| 87 | FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, | ||
| 88 | fmul.neg_b != 0); | ||
| 89 | } | ||
| 90 | } // Anonymous namespace | ||
| 91 | |||
| 92 | void TranslatorVisitor::FMUL_reg(u64 insn) { | ||
| 93 | return FMUL(*this, insn, GetReg20(insn)); | ||
| 94 | } | ||
| 95 | |||
| 96 | void TranslatorVisitor::FMUL_cbuf(u64) { | ||
| 97 | throw NotImplementedException("FMUL (cbuf)"); | ||
| 98 | } | ||
| 99 | |||
| 100 | void TranslatorVisitor::FMUL_imm(u64) { | ||
| 101 | throw NotImplementedException("FMUL (imm)"); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::FMUL32I(u64) { | ||
| 105 | throw NotImplementedException("FMUL32I"); | ||
| 106 | } | ||
| 107 | |||
| 108 | } // namespace Shader::Maxwell \ No newline at end of file | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 7bc7ce9f2..548c7f611 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp | |||
| @@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { | |||
| 16 | ir.SetReg(dest_reg, value); | 16 | ir.SetReg(dest_reg, value); |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | IR::U32 TranslatorVisitor::GetReg20(u64 insn) { | ||
| 20 | union { | ||
| 21 | u64 raw; | ||
| 22 | BitField<20, 8, IR::Reg> index; | ||
| 23 | } const reg{insn}; | ||
| 24 | return X(reg.index); | ||
| 25 | } | ||
| 26 | |||
| 27 | IR::U32 TranslatorVisitor::GetReg39(u64 insn) { | ||
| 28 | union { | ||
| 29 | u64 raw; | ||
| 30 | BitField<39, 8, IR::Reg> index; | ||
| 31 | } const reg{insn}; | ||
| 32 | return X(reg.index); | ||
| 33 | } | ||
| 34 | |||
| 19 | IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { | 35 | IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { |
| 20 | union { | 36 | union { |
| 21 | u64 raw; | 37 | u64 raw; |
| @@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { | |||
| 33 | return ir.GetCbuf(binding, byte_offset); | 49 | return ir.GetCbuf(binding, byte_offset); |
| 34 | } | 50 | } |
| 35 | 51 | ||
| 36 | IR::U32 TranslatorVisitor::GetImm(u64 insn) { | 52 | IR::U32 TranslatorVisitor::GetImm20(u64 insn) { |
| 37 | union { | 53 | union { |
| 38 | u64 raw; | 54 | u64 raw; |
| 39 | BitField<20, 19, u64> value; | 55 | BitField<20, 19, u64> value; |
| @@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) { | |||
| 44 | return ir.Imm32(value); | 60 | return ir.Imm32(value); |
| 45 | } | 61 | } |
| 46 | 62 | ||
| 63 | IR::U32 TranslatorVisitor::GetImm32(u64 insn) { | ||
| 64 | union { | ||
| 65 | u64 raw; | ||
| 66 | BitField<20, 32, u64> value; | ||
| 67 | } const imm{insn}; | ||
| 68 | return ir.Imm32(static_cast<u32>(imm.value)); | ||
| 69 | } | ||
| 70 | |||
| 47 | void TranslatorVisitor::SetZFlag(const IR::U1& value) { | 71 | void TranslatorVisitor::SetZFlag(const IR::U1& value) { |
| 48 | ir.SetZFlag(value); | 72 | ir.SetZFlag(value); |
| 49 | } | 73 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8be7d6ff1..ef6d977fe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h | |||
| @@ -46,7 +46,7 @@ public: | |||
| 46 | void DADD_reg(u64 insn); | 46 | void DADD_reg(u64 insn); |
| 47 | void DADD_cbuf(u64 insn); | 47 | void DADD_cbuf(u64 insn); |
| 48 | void DADD_imm(u64 insn); | 48 | void DADD_imm(u64 insn); |
| 49 | void DEPBAR(u64 insn); | 49 | void DEPBAR(); |
| 50 | void DFMA_reg(u64 insn); | 50 | void DFMA_reg(u64 insn); |
| 51 | void DFMA_rc(u64 insn); | 51 | void DFMA_rc(u64 insn); |
| 52 | void DFMA_cr(u64 insn); | 52 | void DFMA_cr(u64 insn); |
| @@ -298,9 +298,14 @@ public: | |||
| 298 | [[nodiscard]] IR::U32 X(IR::Reg reg); | 298 | [[nodiscard]] IR::U32 X(IR::Reg reg); |
| 299 | void X(IR::Reg dest_reg, const IR::U32& value); | 299 | void X(IR::Reg dest_reg, const IR::U32& value); |
| 300 | 300 | ||
| 301 | [[nodiscard]] IR::U32 GetReg20(u64 insn); | ||
| 302 | [[nodiscard]] IR::U32 GetReg39(u64 insn); | ||
| 303 | |||
| 301 | [[nodiscard]] IR::U32 GetCbuf(u64 insn); | 304 | [[nodiscard]] IR::U32 GetCbuf(u64 insn); |
| 302 | 305 | ||
| 303 | [[nodiscard]] IR::U32 GetImm(u64 insn); | 306 | [[nodiscard]] IR::U32 GetImm20(u64 insn); |
| 307 | |||
| 308 | [[nodiscard]] IR::U32 GetImm32(u64 insn); | ||
| 304 | 309 | ||
| 305 | void SetZFlag(const IR::U1& value); | 310 | void SetZFlag(const IR::U1& value); |
| 306 | void SetSFlag(const IR::U1& value); | 311 | void SetSFlag(const IR::U1& value); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..60f79b160 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp | |||
| @@ -0,0 +1,106 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, | ||
| 12 | bool cc) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a; | ||
| 17 | } const iadd{insn}; | ||
| 18 | |||
| 19 | if (sat) { | ||
| 20 | throw NotImplementedException("IADD SAT"); | ||
| 21 | } | ||
| 22 | if (x && po) { | ||
| 23 | throw NotImplementedException("IADD X+PO"); | ||
| 24 | } | ||
| 25 | // Operand A is always read from here, negated if needed | ||
| 26 | IR::U32 op_a{v.X(iadd.src_a)}; | ||
| 27 | if (neg_a) { | ||
| 28 | op_a = v.ir.INeg(op_a); | ||
| 29 | } | ||
| 30 | // Add both operands | ||
| 31 | IR::U32 result{v.ir.IAdd(op_a, op_b)}; | ||
| 32 | if (x) { | ||
| 33 | const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||
| 34 | result = v.ir.IAdd(result, carry); | ||
| 35 | } | ||
| 36 | if (po) { | ||
| 37 | // .PO adds one to the result | ||
| 38 | result = v.ir.IAdd(result, v.ir.Imm32(1)); | ||
| 39 | } | ||
| 40 | if (cc) { | ||
| 41 | // Store flags | ||
| 42 | // TODO: Does this grab the result pre-PO or after? | ||
| 43 | if (po) { | ||
| 44 | throw NotImplementedException("IADD CC+PO"); | ||
| 45 | } | ||
| 46 | // TODO: How does CC behave when X is set? | ||
| 47 | if (x) { | ||
| 48 | throw NotImplementedException("IADD X+CC"); | ||
| 49 | } | ||
| 50 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 51 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 52 | v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||
| 53 | v.SetOFlag(v.ir.GetOverflowFromOp(result)); | ||
| 54 | } | ||
| 55 | // Store result | ||
| 56 | v.X(iadd.dest_reg, result); | ||
| 57 | } | ||
| 58 | |||
| 59 | void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 60 | union { | ||
| 61 | u64 insn; | ||
| 62 | BitField<43, 1, u64> x; | ||
| 63 | BitField<47, 1, u64> cc; | ||
| 64 | BitField<48, 2, u64> three_for_po; | ||
| 65 | BitField<48, 1, u64> neg_b; | ||
| 66 | BitField<49, 1, u64> neg_a; | ||
| 67 | BitField<50, 1, u64> sat; | ||
| 68 | } const iadd{insn}; | ||
| 69 | |||
| 70 | const bool po{iadd.three_for_po == 3}; | ||
| 71 | const bool neg_a{!po && iadd.neg_a != 0}; | ||
| 72 | if (!po && iadd.neg_b != 0) { | ||
| 73 | op_b = v.ir.INeg(op_b); | ||
| 74 | } | ||
| 75 | IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); | ||
| 76 | } | ||
| 77 | } // Anonymous namespace | ||
| 78 | |||
| 79 | void TranslatorVisitor::IADD_reg(u64) { | ||
| 80 | throw NotImplementedException("IADD (reg)"); | ||
| 81 | } | ||
| 82 | |||
| 83 | void TranslatorVisitor::IADD_cbuf(u64 insn) { | ||
| 84 | IADD(*this, insn, GetCbuf(insn)); | ||
| 85 | } | ||
| 86 | |||
| 87 | void TranslatorVisitor::IADD_imm(u64) { | ||
| 88 | throw NotImplementedException("IADD (imm)"); | ||
| 89 | } | ||
| 90 | |||
| 91 | void TranslatorVisitor::IADD32I(u64 insn) { | ||
| 92 | union { | ||
| 93 | u64 raw; | ||
| 94 | BitField<52, 1, u64> cc; | ||
| 95 | BitField<53, 1, u64> x; | ||
| 96 | BitField<54, 1, u64> sat; | ||
| 97 | BitField<55, 2, u64> three_for_po; | ||
| 98 | BitField<56, 1, u64> neg_a; | ||
| 99 | } const iadd32i{insn}; | ||
| 100 | |||
| 101 | const bool po{iadd32i.three_for_po == 3}; | ||
| 102 | const bool neg_a{!po && iadd32i.neg_a != 0}; | ||
| 103 | IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); | ||
| 104 | } | ||
| 105 | |||
| 106 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..f92c0bbd6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> op_a; | ||
| 16 | BitField<47, 1, u64> cc; | ||
| 17 | BitField<48, 2, u64> three_for_po; | ||
| 18 | BitField<48, 1, u64> neg_b; | ||
| 19 | BitField<49, 1, u64> neg_a; | ||
| 20 | BitField<39, 5, u64> scale; | ||
| 21 | } const iscadd{insn}; | ||
| 22 | |||
| 23 | const bool po{iscadd.three_for_po == 3}; | ||
| 24 | IR::U32 op_a{v.X(iscadd.op_a)}; | ||
| 25 | if (!po) { | ||
| 26 | // When PO is not present, the bits are interpreted as negation | ||
| 27 | if (iscadd.neg_a != 0) { | ||
| 28 | op_a = v.ir.INeg(op_a); | ||
| 29 | } | ||
| 30 | if (iscadd.neg_b != 0) { | ||
| 31 | op_b = v.ir.INeg(op_b); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | // With the operands already processed, scale A | ||
| 35 | const IR::U32 scale{v.ir.Imm32(static_cast<u32>(iscadd.scale))}; | ||
| 36 | const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; | ||
| 37 | |||
| 38 | IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; | ||
| 39 | if (po) { | ||
| 40 | // .PO adds one to the final result | ||
| 41 | result = v.ir.IAdd(result, v.ir.Imm32(1)); | ||
| 42 | } | ||
| 43 | v.X(iscadd.dest_reg, result); | ||
| 44 | |||
| 45 | if (iscadd.cc != 0) { | ||
| 46 | throw NotImplementedException("ISCADD CC"); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | } // Anonymous namespace | ||
| 51 | |||
| 52 | void TranslatorVisitor::ISCADD_reg(u64 insn) { | ||
| 53 | union { | ||
| 54 | u64 raw; | ||
| 55 | BitField<20, 8, IR::Reg> op_b; | ||
| 56 | } const iscadd{insn}; | ||
| 57 | |||
| 58 | ISCADD(*this, insn, X(iscadd.op_b)); | ||
| 59 | } | ||
| 60 | |||
| 61 | void TranslatorVisitor::ISCADD_cbuf(u64) { | ||
| 62 | throw NotImplementedException("ISCADD (cbuf)"); | ||
| 63 | } | ||
| 64 | |||
| 65 | void TranslatorVisitor::ISCADD_imm(u64) { | ||
| 66 | throw NotImplementedException("ISCADD (imm)"); | ||
| 67 | } | ||
| 68 | |||
| 69 | void TranslatorVisitor::ISCADD32I(u64) { | ||
| 70 | throw NotImplementedException("ISCADD32I"); | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..76c6b5291 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class CompareOp : u64 { | ||
| 12 | F, // Always false | ||
| 13 | LT, // Less than | ||
| 14 | EQ, // Equal | ||
| 15 | LE, // Less than or equal | ||
| 16 | GT, // Greater than | ||
| 17 | NE, // Not equal | ||
| 18 | GE, // Greater than or equal | ||
| 19 | T, // Always true | ||
| 20 | }; | ||
| 21 | |||
| 22 | enum class Bop : u64 { | ||
| 23 | AND, | ||
| 24 | OR, | ||
| 25 | XOR, | ||
| 26 | }; | ||
| 27 | |||
| 28 | IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs, | ||
| 29 | bool is_signed) { | ||
| 30 | switch (op) { | ||
| 31 | case CompareOp::F: | ||
| 32 | return ir.Imm1(false); | ||
| 33 | case CompareOp::LT: | ||
| 34 | return ir.ILessThan(lhs, rhs, is_signed); | ||
| 35 | case CompareOp::EQ: | ||
| 36 | return ir.IEqual(lhs, rhs); | ||
| 37 | case CompareOp::LE: | ||
| 38 | return ir.ILessThanEqual(lhs, rhs, is_signed); | ||
| 39 | case CompareOp::GT: | ||
| 40 | return ir.IGreaterThan(lhs, rhs, is_signed); | ||
| 41 | case CompareOp::NE: | ||
| 42 | return ir.INotEqual(lhs, rhs); | ||
| 43 | case CompareOp::GE: | ||
| 44 | return ir.IGreaterThanEqual(lhs, rhs, is_signed); | ||
| 45 | case CompareOp::T: | ||
| 46 | return ir.Imm1(true); | ||
| 47 | } | ||
| 48 | throw NotImplementedException("Invalid ISETP compare op {}", op); | ||
| 49 | } | ||
| 50 | |||
| 51 | IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) { | ||
| 52 | switch (bop) { | ||
| 53 | case Bop::AND: | ||
| 54 | return ir.LogicalAnd(comparison, bop_pred); | ||
| 55 | case Bop::OR: | ||
| 56 | return ir.LogicalOr(comparison, bop_pred); | ||
| 57 | case Bop::XOR: | ||
| 58 | return ir.LogicalXor(comparison, bop_pred); | ||
| 59 | } | ||
| 60 | throw NotImplementedException("Invalid ISETP bop {}", bop); | ||
| 61 | } | ||
| 62 | |||
| 63 | void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 64 | union { | ||
| 65 | u64 raw; | ||
| 66 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 67 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 68 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 69 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 70 | BitField<42, 1, u64> neg_bop_pred; | ||
| 71 | BitField<45, 2, Bop> bop; | ||
| 72 | BitField<48, 1, u64> is_signed; | ||
| 73 | BitField<49, 3, CompareOp> compare_op; | ||
| 74 | } const isetp{insn}; | ||
| 75 | |||
| 76 | const Bop bop{isetp.bop}; | ||
| 77 | const IR::U32 op_a{v.X(isetp.src_reg_a)}; | ||
| 78 | const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)}; | ||
| 79 | const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; | ||
| 80 | const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)}; | ||
| 81 | const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)}; | ||
| 82 | v.ir.SetPred(isetp.dest_pred_a, result_a); | ||
| 83 | v.ir.SetPred(isetp.dest_pred_b, result_b); | ||
| 84 | } | ||
| 85 | } // Anonymous namespace | ||
| 86 | |||
| 87 | void TranslatorVisitor::ISETP_reg(u64 insn) { | ||
| 88 | ISETP(*this, insn, GetReg20(insn)); | ||
| 89 | } | ||
| 90 | |||
| 91 | void TranslatorVisitor::ISETP_cbuf(u64 insn) { | ||
| 92 | ISETP(*this, insn, GetCbuf(insn)); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::ISETP_imm(u64) { | ||
| 96 | throw NotImplementedException("ISETP_imm"); | ||
| 97 | } | ||
| 98 | |||
| 99 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..d4b417d14 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 16 | BitField<39, 1, u64> w; | ||
| 17 | BitField<43, 1, u64> x; | ||
| 18 | BitField<47, 1, u64> cc; | ||
| 19 | } const shl{insn}; | ||
| 20 | |||
| 21 | if (shl.x != 0) { | ||
| 22 | throw NotImplementedException("SHL.X"); | ||
| 23 | } | ||
| 24 | if (shl.cc != 0) { | ||
| 25 | throw NotImplementedException("SHL.CC"); | ||
| 26 | } | ||
| 27 | const IR::U32 base{v.X(shl.src_reg_a)}; | ||
| 28 | IR::U32 result; | ||
| 29 | if (shl.w != 0) { | ||
| 30 | // When .W is set, the shift value is wrapped | ||
| 31 | // To emulate this we just have to clamp it ourselves. | ||
| 32 | const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; | ||
| 33 | result = v.ir.ShiftLeftLogical(base, shift); | ||
| 34 | } else { | ||
| 35 | // When .W is not set, the shift value is clamped between 0 and 32. | ||
| 36 | // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. | ||
| 37 | // We can safely evaluate an out of bounds shift according to the SPIR-V specification: | ||
| 38 | // | ||
| 39 | // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical | ||
| 40 | // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than | ||
| 41 | // or equal to the bit width of the components of Base." | ||
| 42 | // | ||
| 43 | // And on the GLASM specification it is also safe to evaluate out of bounds: | ||
| 44 | // | ||
| 45 | // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt | ||
| 46 | // "The results of a shift operation ("<<") are undefined if the value of the second operand | ||
| 47 | // is negative, or greater than or equal to the number of bits in the first operand." | ||
| 48 | // | ||
| 49 | // Emphasis on undefined results in contrast to undefined behavior. | ||
| 50 | // | ||
| 51 | const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; | ||
| 52 | const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; | ||
| 53 | result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); | ||
| 54 | } | ||
| 55 | v.X(shl.dest_reg, result); | ||
| 56 | } | ||
| 57 | } // Anonymous namespace | ||
| 58 | |||
| 59 | void TranslatorVisitor::SHL_reg(u64) { | ||
| 60 | throw NotImplementedException("SHL_reg"); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::SHL_cbuf(u64) { | ||
| 64 | throw NotImplementedException("SHL_cbuf"); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::SHL_imm(u64 insn) { | ||
| 68 | SHL(*this, insn, GetImm20(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..70a7c76c5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SelectMode : u64 { | ||
| 12 | Default, | ||
| 13 | CLO, | ||
| 14 | CHI, | ||
| 15 | CSFU, | ||
| 16 | CBCC, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class Half : u64 { | ||
| 20 | H0, // Least-significant bits (15:0) | ||
| 21 | H1, // Most-significant bits (31:16) | ||
| 22 | }; | ||
| 23 | |||
| 24 | IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { | ||
| 25 | const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; | ||
| 26 | return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); | ||
| 27 | } | ||
| 28 | |||
| 29 | void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, | ||
| 30 | SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { | ||
| 31 | union { | ||
| 32 | u64 raw; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 35 | BitField<47, 1, u64> cc; | ||
| 36 | BitField<48, 1, u64> is_a_signed; | ||
| 37 | BitField<49, 1, u64> is_b_signed; | ||
| 38 | BitField<53, 1, Half> half_a; | ||
| 39 | } const xmad{insn}; | ||
| 40 | |||
| 41 | if (x) { | ||
| 42 | throw NotImplementedException("XMAD X"); | ||
| 43 | } | ||
| 44 | const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; | ||
| 45 | const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; | ||
| 46 | |||
| 47 | IR::U32 product{v.ir.IMul(op_a, op_b)}; | ||
| 48 | if (psl) { | ||
| 49 | // .PSL shifts the product 16 bits | ||
| 50 | product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); | ||
| 51 | } | ||
| 52 | const IR::U32 op_c{[&]() -> IR::U32 { | ||
| 53 | switch (select_mode) { | ||
| 54 | case SelectMode::Default: | ||
| 55 | return src_c; | ||
| 56 | case SelectMode::CLO: | ||
| 57 | return ExtractHalf(v, src_c, Half::H0, false); | ||
| 58 | case SelectMode::CHI: | ||
| 59 | return ExtractHalf(v, src_c, Half::H1, false); | ||
| 60 | case SelectMode::CBCC: | ||
| 61 | return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); | ||
| 62 | case SelectMode::CSFU: | ||
| 63 | throw NotImplementedException("XMAD CSFU"); | ||
| 64 | } | ||
| 65 | throw NotImplementedException("Invalid XMAD select mode {}", select_mode); | ||
| 66 | }()}; | ||
| 67 | IR::U32 result{v.ir.IAdd(product, op_c)}; | ||
| 68 | if (mrg) { | ||
| 69 | // .MRG inserts src_b [15:0] into result's [31:16]. | ||
| 70 | const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; | ||
| 71 | result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); | ||
| 72 | } | ||
| 73 | if (xmad.cc) { | ||
| 74 | throw NotImplementedException("XMAD CC"); | ||
| 75 | } | ||
| 76 | // Store result | ||
| 77 | v.X(xmad.dest_reg, result); | ||
| 78 | } | ||
| 79 | } // Anonymous namespace | ||
| 80 | |||
| 81 | void TranslatorVisitor::XMAD_reg(u64) { | ||
| 82 | throw NotImplementedException("XMAD (reg)"); | ||
| 83 | } | ||
| 84 | |||
| 85 | void TranslatorVisitor::XMAD_rc(u64) { | ||
| 86 | throw NotImplementedException("XMAD (rc)"); | ||
| 87 | } | ||
| 88 | |||
| 89 | void TranslatorVisitor::XMAD_cr(u64) { | ||
| 90 | throw NotImplementedException("XMAD (cr)"); | ||
| 91 | } | ||
| 92 | |||
| 93 | void TranslatorVisitor::XMAD_imm(u64 insn) { | ||
| 94 | union { | ||
| 95 | u64 raw; | ||
| 96 | BitField<20, 16, u64> src_b; | ||
| 97 | BitField<36, 1, u64> psl; | ||
| 98 | BitField<37, 1, u64> mrg; | ||
| 99 | BitField<38, 1, u64> x; | ||
| 100 | BitField<39, 8, IR::Reg> src_c; | ||
| 101 | BitField<50, 3, SelectMode> select_mode; | ||
| 102 | } const xmad{insn}; | ||
| 103 | |||
| 104 | const IR::U32 src_b{ir.Imm32(static_cast<u32>(xmad.src_b))}; | ||
| 105 | const IR::U32 src_c{X(xmad.src_c)}; | ||
| 106 | XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, | ||
| 107 | xmad.x != 0); | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index d8fd387cf..c9669c617 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp | |||
| @@ -10,16 +10,35 @@ | |||
| 10 | 10 | ||
| 11 | namespace Shader::Maxwell { | 11 | namespace Shader::Maxwell { |
| 12 | namespace { | 12 | namespace { |
| 13 | enum class LoadSize : u64 { | ||
| 14 | U8, // Zero-extend | ||
| 15 | S8, // Sign-extend | ||
| 16 | U16, // Zero-extend | ||
| 17 | S16, // Sign-extend | ||
| 18 | B32, | ||
| 19 | B64, | ||
| 20 | B128, | ||
| 21 | U128, // ??? | ||
| 22 | }; | ||
| 23 | |||
| 13 | enum class StoreSize : u64 { | 24 | enum class StoreSize : u64 { |
| 14 | U8, | 25 | U8, // Zero-extend |
| 15 | S8, | 26 | S8, // Sign-extend |
| 16 | U16, | 27 | U16, // Zero-extend |
| 17 | S16, | 28 | S16, // Sign-extend |
| 18 | B32, | 29 | B32, |
| 19 | B64, | 30 | B64, |
| 20 | B128, | 31 | B128, |
| 21 | }; | 32 | }; |
| 22 | 33 | ||
| 34 | // See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||
| 35 | enum class LoadCache : u64 { | ||
| 36 | CA, // Cache at all levels, likely to be accessed again | ||
| 37 | CG, // Cache at global level (cache in L2 and below, not L1) | ||
| 38 | CI, // ??? | ||
| 39 | CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) | ||
| 40 | }; | ||
| 41 | |||
| 23 | // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | 42 | // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html |
| 24 | enum class StoreCache : u64 { | 43 | enum class StoreCache : u64 { |
| 25 | WB, // Cache write-back all coherent levels | 44 | WB, // Cache write-back all coherent levels |
| @@ -27,61 +46,137 @@ enum class StoreCache : u64 { | |||
| 27 | CS, // Cache streaming, likely to be accessed once | 46 | CS, // Cache streaming, likely to be accessed once |
| 28 | WT, // Cache write-through (to system memory) | 47 | WT, // Cache write-through (to system memory) |
| 29 | }; | 48 | }; |
| 30 | } // Anonymous namespace | ||
| 31 | 49 | ||
| 32 | void TranslatorVisitor::STG(u64 insn) { | 50 | IR::U64 Address(TranslatorVisitor& v, u64 insn) { |
| 33 | // STG stores registers into global memory. | ||
| 34 | union { | 51 | union { |
| 35 | u64 raw; | 52 | u64 raw; |
| 36 | BitField<0, 8, IR::Reg> data_reg; | ||
| 37 | BitField<8, 8, IR::Reg> addr_reg; | 53 | BitField<8, 8, IR::Reg> addr_reg; |
| 54 | BitField<20, 24, s64> addr_offset; | ||
| 55 | BitField<20, 24, u64> rz_addr_offset; | ||
| 38 | BitField<45, 1, u64> e; | 56 | BitField<45, 1, u64> e; |
| 39 | BitField<46, 2, StoreCache> cache; | 57 | } const mem{insn}; |
| 40 | BitField<48, 3, StoreSize> size; | ||
| 41 | } const stg{insn}; | ||
| 42 | 58 | ||
| 43 | const IR::U64 address{[&]() -> IR::U64 { | 59 | const IR::U64 address{[&]() -> IR::U64 { |
| 44 | if (stg.e == 0) { | 60 | if (mem.e == 0) { |
| 45 | // STG without .E uses a 32-bit pointer, zero-extend it | 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it |
| 46 | return ir.ConvertU(64, X(stg.addr_reg)); | 62 | return v.ir.ConvertU(64, v.X(mem.addr_reg)); |
| 47 | } | 63 | } |
| 48 | if (!IR::IsAligned(stg.addr_reg, 2)) { | 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { |
| 49 | throw NotImplementedException("Unaligned address register"); | 65 | throw NotImplementedException("Unaligned address register"); |
| 50 | } | 66 | } |
| 51 | // Pack two registers to build the 32-bit address | 67 | // Pack two registers to build the 64-bit address |
| 52 | return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); | 68 | return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); |
| 69 | }()}; | ||
| 70 | const u64 addr_offset{[&]() -> u64 { | ||
| 71 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 72 | // When RZ is used, the address is an absolute address | ||
| 73 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 74 | } else { | ||
| 75 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 76 | } | ||
| 53 | }()}; | 77 | }()}; |
| 78 | // Apply the offset | ||
| 79 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 80 | } | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 83 | void TranslatorVisitor::LDG(u64 insn) { | ||
| 84 | // LDG loads global memory into registers | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<46, 2, LoadCache> cache; | ||
| 89 | BitField<48, 3, LoadSize> size; | ||
| 90 | } const ldg{insn}; | ||
| 91 | |||
| 92 | // Pointer to load data from | ||
| 93 | const IR::U64 address{Address(*this, insn)}; | ||
| 94 | const IR::Reg dest_reg{ldg.dest_reg}; | ||
| 95 | switch (ldg.size) { | ||
| 96 | case LoadSize::U8: | ||
| 97 | X(dest_reg, ir.LoadGlobalU8(address)); | ||
| 98 | break; | ||
| 99 | case LoadSize::S8: | ||
| 100 | X(dest_reg, ir.LoadGlobalS8(address)); | ||
| 101 | break; | ||
| 102 | case LoadSize::U16: | ||
| 103 | X(dest_reg, ir.LoadGlobalU16(address)); | ||
| 104 | break; | ||
| 105 | case LoadSize::S16: | ||
| 106 | X(dest_reg, ir.LoadGlobalS16(address)); | ||
| 107 | break; | ||
| 108 | case LoadSize::B32: | ||
| 109 | X(dest_reg, ir.LoadGlobal32(address)); | ||
| 110 | break; | ||
| 111 | case LoadSize::B64: { | ||
| 112 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 113 | throw NotImplementedException("Unaligned data registers"); | ||
| 114 | } | ||
| 115 | const IR::Value vector{ir.LoadGlobal64(address)}; | ||
| 116 | for (int i = 0; i < 2; ++i) { | ||
| 117 | X(dest_reg + i, ir.CompositeExtract(vector, i)); | ||
| 118 | } | ||
| 119 | break; | ||
| 120 | } | ||
| 121 | case LoadSize::B128: { | ||
| 122 | if (!IR::IsAligned(dest_reg, 4)) { | ||
| 123 | throw NotImplementedException("Unaligned data registers"); | ||
| 124 | } | ||
| 125 | const IR::Value vector{ir.LoadGlobal128(address)}; | ||
| 126 | for (int i = 0; i < 4; ++i) { | ||
| 127 | X(dest_reg + i, ir.CompositeExtract(vector, i)); | ||
| 128 | } | ||
| 129 | break; | ||
| 130 | } | ||
| 131 | case LoadSize::U128: | ||
| 132 | throw NotImplementedException("LDG U.128"); | ||
| 133 | default: | ||
| 134 | throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 138 | void TranslatorVisitor::STG(u64 insn) { | ||
| 139 | // STG stores registers into global memory. | ||
| 140 | union { | ||
| 141 | u64 raw; | ||
| 142 | BitField<0, 8, IR::Reg> data_reg; | ||
| 143 | BitField<46, 2, StoreCache> cache; | ||
| 144 | BitField<48, 3, StoreSize> size; | ||
| 145 | } const stg{insn}; | ||
| 54 | 146 | ||
| 147 | // Pointer to store data into | ||
| 148 | const IR::U64 address{Address(*this, insn)}; | ||
| 149 | const IR::Reg data_reg{stg.data_reg}; | ||
| 55 | switch (stg.size) { | 150 | switch (stg.size) { |
| 56 | case StoreSize::U8: | 151 | case StoreSize::U8: |
| 57 | ir.WriteGlobalU8(address, X(stg.data_reg)); | 152 | ir.WriteGlobalU8(address, X(data_reg)); |
| 58 | break; | 153 | break; |
| 59 | case StoreSize::S8: | 154 | case StoreSize::S8: |
| 60 | ir.WriteGlobalS8(address, X(stg.data_reg)); | 155 | ir.WriteGlobalS8(address, X(data_reg)); |
| 61 | break; | 156 | break; |
| 62 | case StoreSize::U16: | 157 | case StoreSize::U16: |
| 63 | ir.WriteGlobalU16(address, X(stg.data_reg)); | 158 | ir.WriteGlobalU16(address, X(data_reg)); |
| 64 | break; | 159 | break; |
| 65 | case StoreSize::S16: | 160 | case StoreSize::S16: |
| 66 | ir.WriteGlobalS16(address, X(stg.data_reg)); | 161 | ir.WriteGlobalS16(address, X(data_reg)); |
| 67 | break; | 162 | break; |
| 68 | case StoreSize::B32: | 163 | case StoreSize::B32: |
| 69 | ir.WriteGlobal32(address, X(stg.data_reg)); | 164 | ir.WriteGlobal32(address, X(data_reg)); |
| 70 | break; | 165 | break; |
| 71 | case StoreSize::B64: { | 166 | case StoreSize::B64: { |
| 72 | if (!IR::IsAligned(stg.data_reg, 2)) { | 167 | if (!IR::IsAligned(data_reg, 2)) { |
| 73 | throw NotImplementedException("Unaligned data registers"); | 168 | throw NotImplementedException("Unaligned data registers"); |
| 74 | } | 169 | } |
| 75 | const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; | 170 | const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; |
| 76 | ir.WriteGlobal64(address, vector); | 171 | ir.WriteGlobal64(address, vector); |
| 77 | break; | 172 | break; |
| 78 | } | 173 | } |
| 79 | case StoreSize::B128: | 174 | case StoreSize::B128: |
| 80 | if (!IR::IsAligned(stg.data_reg, 4)) { | 175 | if (!IR::IsAligned(data_reg, 4)) { |
| 81 | throw NotImplementedException("Unaligned data registers"); | 176 | throw NotImplementedException("Unaligned data registers"); |
| 82 | } | 177 | } |
| 83 | const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), | 178 | const IR::Value vector{ |
| 84 | X(stg.data_reg + 2), X(stg.data_reg + 3))}; | 179 | ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; |
| 85 | ir.WriteGlobal128(address, vector); | 180 | ir.WriteGlobal128(address, vector); |
| 86 | break; | 181 | break; |
| 87 | } | 182 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 7fa35ba3a..1711d3f48 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp | |||
| @@ -39,7 +39,7 @@ void TranslatorVisitor::MOV_cbuf(u64 insn) { | |||
| 39 | void TranslatorVisitor::MOV_imm(u64 insn) { | 39 | void TranslatorVisitor::MOV_imm(u64 insn) { |
| 40 | const MOV mov{insn}; | 40 | const MOV mov{insn}; |
| 41 | CheckMask(mov); | 41 | CheckMask(mov); |
| 42 | X(mov.dest_reg, GetImm(insn)); | 42 | X(mov.dest_reg, GetImm20(insn)); |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | } // namespace Shader::Maxwell | 45 | } // namespace Shader::Maxwell |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..93cea302a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp | |||
| @@ -0,0 +1,114 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SpecialRegister : u64 { | ||
| 12 | SR_LANEID = 0, | ||
| 13 | SR_VIRTCFG = 2, | ||
| 14 | SR_VIRTID = 3, | ||
| 15 | SR_PM0 = 4, | ||
| 16 | SR_PM1 = 5, | ||
| 17 | SR_PM2 = 6, | ||
| 18 | SR_PM3 = 7, | ||
| 19 | SR_PM4 = 8, | ||
| 20 | SR_PM5 = 9, | ||
| 21 | SR_PM6 = 10, | ||
| 22 | SR_PM7 = 11, | ||
| 23 | SR_ORDERING_TICKET = 15, | ||
| 24 | SR_PRIM_TYPE = 16, | ||
| 25 | SR_INVOCATION_ID = 17, | ||
| 26 | SR_Y_DIRECTION = 18, | ||
| 27 | SR_THREAD_KILL = 19, | ||
| 28 | SM_SHADER_TYPE = 20, | ||
| 29 | SR_DIRECTCBEWRITEADDRESSLOW = 21, | ||
| 30 | SR_DIRECTCBEWRITEADDRESSHIGH = 22, | ||
| 31 | SR_DIRECTCBEWRITEENABLE = 23, | ||
| 32 | SR_MACHINE_ID_0 = 24, | ||
| 33 | SR_MACHINE_ID_1 = 25, | ||
| 34 | SR_MACHINE_ID_2 = 26, | ||
| 35 | SR_MACHINE_ID_3 = 27, | ||
| 36 | SR_AFFINITY = 28, | ||
| 37 | SR_INVOCATION_INFO = 29, | ||
| 38 | SR_WSCALEFACTOR_XY = 30, | ||
| 39 | SR_WSCALEFACTOR_Z = 31, | ||
| 40 | SR_TID = 32, | ||
| 41 | SR_TID_X = 33, | ||
| 42 | SR_TID_Y = 34, | ||
| 43 | SR_TID_Z = 35, | ||
| 44 | SR_CTAID_X = 37, | ||
| 45 | SR_CTAID_Y = 38, | ||
| 46 | SR_CTAID_Z = 39, | ||
| 47 | SR_NTID = 49, | ||
| 48 | SR_CirQueueIncrMinusOne = 50, | ||
| 49 | SR_NLATC = 51, | ||
| 50 | SR_SWINLO = 57, | ||
| 51 | SR_SWINSZ = 58, | ||
| 52 | SR_SMEMSZ = 59, | ||
| 53 | SR_SMEMBANKS = 60, | ||
| 54 | SR_LWINLO = 61, | ||
| 55 | SR_LWINSZ = 62, | ||
| 56 | SR_LMEMLOSZ = 63, | ||
| 57 | SR_LMEMHIOFF = 64, | ||
| 58 | SR_EQMASK = 65, | ||
| 59 | SR_LTMASK = 66, | ||
| 60 | SR_LEMASK = 67, | ||
| 61 | SR_GTMASK = 68, | ||
| 62 | SR_GEMASK = 69, | ||
| 63 | SR_REGALLOC = 70, | ||
| 64 | SR_GLOBALERRORSTATUS = 73, | ||
| 65 | SR_WARPERRORSTATUS = 75, | ||
| 66 | SR_PM_HI0 = 81, | ||
| 67 | SR_PM_HI1 = 82, | ||
| 68 | SR_PM_HI2 = 83, | ||
| 69 | SR_PM_HI3 = 84, | ||
| 70 | SR_PM_HI4 = 85, | ||
| 71 | SR_PM_HI5 = 86, | ||
| 72 | SR_PM_HI6 = 87, | ||
| 73 | SR_PM_HI7 = 88, | ||
| 74 | SR_CLOCKLO = 89, | ||
| 75 | SR_CLOCKHI = 90, | ||
| 76 | SR_GLOBALTIMERLO = 91, | ||
| 77 | SR_GLOBALTIMERHI = 92, | ||
| 78 | SR_HWTASKID = 105, | ||
| 79 | SR_CIRCULARQUEUEENTRYINDEX = 106, | ||
| 80 | SR_CIRCULARQUEUEENTRYADDRESSLOW = 107, | ||
| 81 | SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108, | ||
| 82 | }; | ||
| 83 | |||
| 84 | [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { | ||
| 85 | switch (special_register) { | ||
| 86 | case SpecialRegister::SR_TID_X: | ||
| 87 | return ir.LocalInvocationIdX(); | ||
| 88 | case SpecialRegister::SR_TID_Y: | ||
| 89 | return ir.LocalInvocationIdY(); | ||
| 90 | case SpecialRegister::SR_TID_Z: | ||
| 91 | return ir.LocalInvocationIdZ(); | ||
| 92 | case SpecialRegister::SR_CTAID_X: | ||
| 93 | return ir.WorkgroupIdX(); | ||
| 94 | case SpecialRegister::SR_CTAID_Y: | ||
| 95 | return ir.WorkgroupIdY(); | ||
| 96 | case SpecialRegister::SR_CTAID_Z: | ||
| 97 | return ir.WorkgroupIdZ(); | ||
| 98 | default: | ||
| 99 | throw NotImplementedException("S2R special register {}", special_register); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | } // Anonymous namespace | ||
| 103 | |||
| 104 | void TranslatorVisitor::S2R(u64 insn) { | ||
| 105 | union { | ||
| 106 | u64 raw; | ||
| 107 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 108 | BitField<20, 8, SpecialRegister> src_reg; | ||
| 109 | } const s2r{insn}; | ||
| 110 | |||
| 111 | X(s2r.dest_reg, Read(ir, s2r.src_reg)); | ||
| 112 | } | ||
| 113 | |||
| 114 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 0f52696d1..d70399f6b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -7,21 +7,8 @@ | |||
| 7 | #include "shader_recompiler/frontend/maxwell/opcode.h" | 7 | #include "shader_recompiler/frontend/maxwell/opcode.h" |
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" |
| 9 | 9 | ||
| 10 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | 10 | namespace Shader::Maxwell { |
| 13 | 11 | ||
| 14 | [[maybe_unused]] static inline void DumpOptimized(IR::Block& block) { | ||
| 15 | auto raw{IR::DumpBlock(block)}; | ||
| 16 | |||
| 17 | Optimization::GetSetElimination(block); | ||
| 18 | Optimization::DeadCodeEliminationPass(block); | ||
| 19 | Optimization::IdentityRemovalPass(block); | ||
| 20 | auto dumped{IR::DumpBlock(block)}; | ||
| 21 | |||
| 22 | fmt::print(stderr, "{}", dumped); | ||
| 23 | } | ||
| 24 | |||
| 25 | [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { | 12 | [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { |
| 26 | throw NotImplementedException("Instruction {} is not implemented", opcode); | 13 | throw NotImplementedException("Instruction {} is not implemented", opcode); |
| 27 | } | 14 | } |
| @@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) { | |||
| 146 | ThrowNotImplemented(Opcode::DADD_imm); | 133 | ThrowNotImplemented(Opcode::DADD_imm); |
| 147 | } | 134 | } |
| 148 | 135 | ||
| 149 | void TranslatorVisitor::DEPBAR(u64) { | 136 | void TranslatorVisitor::DEPBAR() { |
| 150 | ThrowNotImplemented(Opcode::DEPBAR); | 137 | // DEPBAR is a no-op |
| 151 | } | 138 | } |
| 152 | 139 | ||
| 153 | void TranslatorVisitor::DFMA_reg(u64) { | 140 | void TranslatorVisitor::DFMA_reg(u64) { |
| @@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) { | |||
| 230 | ThrowNotImplemented(Opcode::F2F_imm); | 217 | ThrowNotImplemented(Opcode::F2F_imm); |
| 231 | } | 218 | } |
| 232 | 219 | ||
| 233 | void TranslatorVisitor::FADD_reg(u64) { | ||
| 234 | ThrowNotImplemented(Opcode::FADD_reg); | ||
| 235 | } | ||
| 236 | |||
| 237 | void TranslatorVisitor::FADD_cbuf(u64) { | ||
| 238 | ThrowNotImplemented(Opcode::FADD_cbuf); | ||
| 239 | } | ||
| 240 | |||
| 241 | void TranslatorVisitor::FADD_imm(u64) { | ||
| 242 | ThrowNotImplemented(Opcode::FADD_imm); | ||
| 243 | } | ||
| 244 | |||
| 245 | void TranslatorVisitor::FADD32I(u64) { | ||
| 246 | ThrowNotImplemented(Opcode::FADD32I); | ||
| 247 | } | ||
| 248 | |||
| 249 | void TranslatorVisitor::FCHK_reg(u64) { | 220 | void TranslatorVisitor::FCHK_reg(u64) { |
| 250 | ThrowNotImplemented(Opcode::FCHK_reg); | 221 | ThrowNotImplemented(Opcode::FCHK_reg); |
| 251 | } | 222 | } |
| @@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) { | |||
| 274 | ThrowNotImplemented(Opcode::FCMP_imm); | 245 | ThrowNotImplemented(Opcode::FCMP_imm); |
| 275 | } | 246 | } |
| 276 | 247 | ||
| 277 | void TranslatorVisitor::FFMA_reg(u64) { | ||
| 278 | ThrowNotImplemented(Opcode::FFMA_reg); | ||
| 279 | } | ||
| 280 | |||
| 281 | void TranslatorVisitor::FFMA_rc(u64) { | ||
| 282 | ThrowNotImplemented(Opcode::FFMA_rc); | ||
| 283 | } | ||
| 284 | |||
| 285 | void TranslatorVisitor::FFMA_cr(u64) { | ||
| 286 | ThrowNotImplemented(Opcode::FFMA_cr); | ||
| 287 | } | ||
| 288 | |||
| 289 | void TranslatorVisitor::FFMA_imm(u64) { | ||
| 290 | ThrowNotImplemented(Opcode::FFMA_imm); | ||
| 291 | } | ||
| 292 | |||
| 293 | void TranslatorVisitor::FFMA32I(u64) { | ||
| 294 | ThrowNotImplemented(Opcode::FFMA32I); | ||
| 295 | } | ||
| 296 | |||
| 297 | void TranslatorVisitor::FLO_reg(u64) { | 248 | void TranslatorVisitor::FLO_reg(u64) { |
| 298 | ThrowNotImplemented(Opcode::FLO_reg); | 249 | ThrowNotImplemented(Opcode::FLO_reg); |
| 299 | } | 250 | } |
| @@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) { | |||
| 318 | ThrowNotImplemented(Opcode::FMNMX_imm); | 269 | ThrowNotImplemented(Opcode::FMNMX_imm); |
| 319 | } | 270 | } |
| 320 | 271 | ||
| 321 | void TranslatorVisitor::FMUL_reg(u64) { | ||
| 322 | ThrowNotImplemented(Opcode::FMUL_reg); | ||
| 323 | } | ||
| 324 | |||
| 325 | void TranslatorVisitor::FMUL_cbuf(u64) { | ||
| 326 | ThrowNotImplemented(Opcode::FMUL_cbuf); | ||
| 327 | } | ||
| 328 | |||
| 329 | void TranslatorVisitor::FMUL_imm(u64) { | ||
| 330 | ThrowNotImplemented(Opcode::FMUL_imm); | ||
| 331 | } | ||
| 332 | |||
| 333 | void TranslatorVisitor::FMUL32I(u64) { | ||
| 334 | ThrowNotImplemented(Opcode::FMUL32I); | ||
| 335 | } | ||
| 336 | |||
| 337 | void TranslatorVisitor::FSET_reg(u64) { | 272 | void TranslatorVisitor::FSET_reg(u64) { |
| 338 | ThrowNotImplemented(Opcode::FSET_reg); | 273 | ThrowNotImplemented(Opcode::FSET_reg); |
| 339 | } | 274 | } |
| @@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) { | |||
| 470 | ThrowNotImplemented(Opcode::I2I_imm); | 405 | ThrowNotImplemented(Opcode::I2I_imm); |
| 471 | } | 406 | } |
| 472 | 407 | ||
| 473 | void TranslatorVisitor::IADD_reg(u64) { | ||
| 474 | ThrowNotImplemented(Opcode::IADD_reg); | ||
| 475 | } | ||
| 476 | |||
| 477 | void TranslatorVisitor::IADD_cbuf(u64) { | ||
| 478 | ThrowNotImplemented(Opcode::IADD_cbuf); | ||
| 479 | } | ||
| 480 | |||
| 481 | void TranslatorVisitor::IADD_imm(u64) { | ||
| 482 | ThrowNotImplemented(Opcode::IADD_imm); | ||
| 483 | } | ||
| 484 | |||
| 485 | void TranslatorVisitor::IADD3_reg(u64) { | 408 | void TranslatorVisitor::IADD3_reg(u64) { |
| 486 | ThrowNotImplemented(Opcode::IADD3_reg); | 409 | ThrowNotImplemented(Opcode::IADD3_reg); |
| 487 | } | 410 | } |
| @@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) { | |||
| 494 | ThrowNotImplemented(Opcode::IADD3_imm); | 417 | ThrowNotImplemented(Opcode::IADD3_imm); |
| 495 | } | 418 | } |
| 496 | 419 | ||
| 497 | void TranslatorVisitor::IADD32I(u64) { | ||
| 498 | ThrowNotImplemented(Opcode::IADD32I); | ||
| 499 | } | ||
| 500 | |||
| 501 | void TranslatorVisitor::ICMP_reg(u64) { | 420 | void TranslatorVisitor::ICMP_reg(u64) { |
| 502 | ThrowNotImplemented(Opcode::ICMP_reg); | 421 | ThrowNotImplemented(Opcode::ICMP_reg); |
| 503 | } | 422 | } |
| @@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) { | |||
| 594 | ThrowNotImplemented(Opcode::ISBERD); | 513 | ThrowNotImplemented(Opcode::ISBERD); |
| 595 | } | 514 | } |
| 596 | 515 | ||
| 597 | void TranslatorVisitor::ISCADD_reg(u64) { | ||
| 598 | ThrowNotImplemented(Opcode::ISCADD_reg); | ||
| 599 | } | ||
| 600 | |||
| 601 | void TranslatorVisitor::ISCADD_cbuf(u64) { | ||
| 602 | ThrowNotImplemented(Opcode::ISCADD_cbuf); | ||
| 603 | } | ||
| 604 | |||
| 605 | void TranslatorVisitor::ISCADD_imm(u64) { | ||
| 606 | ThrowNotImplemented(Opcode::ISCADD_imm); | ||
| 607 | } | ||
| 608 | |||
| 609 | void TranslatorVisitor::ISCADD32I(u64) { | ||
| 610 | ThrowNotImplemented(Opcode::ISCADD32I); | ||
| 611 | } | ||
| 612 | |||
| 613 | void TranslatorVisitor::ISET_reg(u64) { | 516 | void TranslatorVisitor::ISET_reg(u64) { |
| 614 | ThrowNotImplemented(Opcode::ISET_reg); | 517 | ThrowNotImplemented(Opcode::ISET_reg); |
| 615 | } | 518 | } |
| @@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) { | |||
| 622 | ThrowNotImplemented(Opcode::ISET_imm); | 525 | ThrowNotImplemented(Opcode::ISET_imm); |
| 623 | } | 526 | } |
| 624 | 527 | ||
| 625 | void TranslatorVisitor::ISETP_reg(u64) { | ||
| 626 | ThrowNotImplemented(Opcode::ISETP_reg); | ||
| 627 | } | ||
| 628 | |||
| 629 | void TranslatorVisitor::ISETP_cbuf(u64) { | ||
| 630 | ThrowNotImplemented(Opcode::ISETP_cbuf); | ||
| 631 | } | ||
| 632 | |||
| 633 | void TranslatorVisitor::ISETP_imm(u64) { | ||
| 634 | ThrowNotImplemented(Opcode::ISETP_imm); | ||
| 635 | } | ||
| 636 | |||
| 637 | void TranslatorVisitor::JCAL(u64) { | 528 | void TranslatorVisitor::JCAL(u64) { |
| 638 | ThrowNotImplemented(Opcode::JCAL); | 529 | ThrowNotImplemented(Opcode::JCAL); |
| 639 | } | 530 | } |
| @@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) { | |||
| 658 | ThrowNotImplemented(Opcode::LDC); | 549 | ThrowNotImplemented(Opcode::LDC); |
| 659 | } | 550 | } |
| 660 | 551 | ||
| 661 | void TranslatorVisitor::LDG(u64) { | ||
| 662 | ThrowNotImplemented(Opcode::LDG); | ||
| 663 | } | ||
| 664 | |||
| 665 | void TranslatorVisitor::LDL(u64) { | 552 | void TranslatorVisitor::LDL(u64) { |
| 666 | ThrowNotImplemented(Opcode::LDL); | 553 | ThrowNotImplemented(Opcode::LDL); |
| 667 | } | 554 | } |
| @@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) { | |||
| 866 | ThrowNotImplemented(Opcode::RTT); | 753 | ThrowNotImplemented(Opcode::RTT); |
| 867 | } | 754 | } |
| 868 | 755 | ||
| 869 | void TranslatorVisitor::S2R(u64) { | ||
| 870 | ThrowNotImplemented(Opcode::S2R); | ||
| 871 | } | ||
| 872 | |||
| 873 | void TranslatorVisitor::SAM(u64) { | 756 | void TranslatorVisitor::SAM(u64) { |
| 874 | ThrowNotImplemented(Opcode::SAM); | 757 | ThrowNotImplemented(Opcode::SAM); |
| 875 | } | 758 | } |
| @@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) { | |||
| 914 | ThrowNotImplemented(Opcode::SHFL); | 797 | ThrowNotImplemented(Opcode::SHFL); |
| 915 | } | 798 | } |
| 916 | 799 | ||
| 917 | void TranslatorVisitor::SHL_reg(u64) { | ||
| 918 | ThrowNotImplemented(Opcode::SHL_reg); | ||
| 919 | } | ||
| 920 | |||
| 921 | void TranslatorVisitor::SHL_cbuf(u64) { | ||
| 922 | ThrowNotImplemented(Opcode::SHL_cbuf); | ||
| 923 | } | ||
| 924 | |||
| 925 | void TranslatorVisitor::SHL_imm(u64) { | ||
| 926 | ThrowNotImplemented(Opcode::SHL_imm); | ||
| 927 | } | ||
| 928 | |||
| 929 | void TranslatorVisitor::SHR_reg(u64) { | 800 | void TranslatorVisitor::SHR_reg(u64) { |
| 930 | ThrowNotImplemented(Opcode::SHR_reg); | 801 | ThrowNotImplemented(Opcode::SHR_reg); |
| 931 | } | 802 | } |
| @@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) { | |||
| 1086 | ThrowNotImplemented(Opcode::VSHR); | 957 | ThrowNotImplemented(Opcode::VSHR); |
| 1087 | } | 958 | } |
| 1088 | 959 | ||
| 1089 | void TranslatorVisitor::XMAD_reg(u64) { | ||
| 1090 | ThrowNotImplemented(Opcode::XMAD_reg); | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | void TranslatorVisitor::XMAD_rc(u64) { | ||
| 1094 | ThrowNotImplemented(Opcode::XMAD_rc); | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | void TranslatorVisitor::XMAD_cr(u64) { | ||
| 1098 | ThrowNotImplemented(Opcode::XMAD_cr); | ||
| 1099 | } | ||
| 1100 | |||
| 1101 | void TranslatorVisitor::XMAD_imm(u64) { | ||
| 1102 | ThrowNotImplemented(Opcode::XMAD_imm); | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | } // namespace Shader::Maxwell | 960 | } // namespace Shader::Maxwell |