summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/frontend/maxwell
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/frontend/maxwell/program.cpp1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h56
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp73
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp26
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h9
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp106
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp73
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp99
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp149
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp (renamed from src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp)2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp114
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp149
16 files changed, 1039 insertions, 178 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 49d1f4bfb..bd1f96c07 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) {
56 Optimization::Invoke(Optimization::IdentityRemovalPass, function); 56 Optimization::Invoke(Optimization::IdentityRemovalPass, function);
57 // Optimization::Invoke(Optimization::VerificationPass, function); 57 // Optimization::Invoke(Optimization::VerificationPass, function);
58 } 58 }
59 //*/
59} 60}
60 61
61std::string DumpProgram(const Program& program) { 62std::string DumpProgram(const Program& program) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..3da37a2bb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,56 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
11
12namespace Shader::Maxwell {
13
14enum class FpRounding : u64 {
15 RN,
16 RM,
17 RP,
18 RZ,
19};
20
21enum class FmzMode : u64 {
22 None,
23 FTZ,
24 FMZ,
25 INVALIDFMZ3,
26};
27
28inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
29 switch (fp_rounding) {
30 case FpRounding::RN:
31 return IR::FpRounding::RN;
32 case FpRounding::RM:
33 return IR::FpRounding::RM;
34 case FpRounding::RP:
35 return IR::FpRounding::RP;
36 case FpRounding::RZ:
37 return IR::FpRounding::RZ;
38 }
39 throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
40}
41
42inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
43 switch (fmz_mode) {
44 case FmzMode::None:
45 return IR::FmzMode::None;
46 case FmzMode::FTZ:
47 return IR::FmzMode::FTZ;
48 case FmzMode::FMZ:
49 return IR::FmzMode::FMZ;
50 case FmzMode::INVALIDFMZ3:
51 break;
52 }
53 throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
54}
55
56} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..d2c44b9cc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
14 const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
15 union {
16 u64 raw;
17 BitField<0, 8, IR::Reg> dest_reg;
18 BitField<8, 8, IR::Reg> src_a;
19 } const fadd{insn};
20
21 if (sat) {
22 throw NotImplementedException("FADD SAT");
23 }
24 if (cc) {
25 throw NotImplementedException("FADD CC");
26 }
27 const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)};
28 const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
29 IR::FpControl control{
30 .no_contraction{true},
31 .rounding{CastFpRounding(fp_rounding)},
32 .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
33 };
34 v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
35}
36
37void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
38 union {
39 u64 raw;
40 BitField<39, 2, FpRounding> fp_rounding;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> neg_b;
43 BitField<46, 1, u64> abs_a;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> neg_a;
46 BitField<49, 1, u64> abs_b;
47 BitField<50, 1, u64> sat;
48 } const fadd{insn};
49
50 FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
51 fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::FADD_reg(u64 insn) {
56 FADD(*this, insn, GetReg20(insn));
57}
58
59void TranslatorVisitor::FADD_cbuf(u64) {
60 throw NotImplementedException("FADD (cbuf)");
61}
62
63void TranslatorVisitor::FADD_imm(u64) {
64 throw NotImplementedException("FADD (imm)");
65}
66
67void TranslatorVisitor::FADD32I(u64) {
68 throw NotImplementedException("FADD32I");
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..30ca052ec
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,73 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a,
13 bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const ffma{insn};
19
20 if (sat) {
21 throw NotImplementedException("FFMA SAT");
22 }
23 if (cc) {
24 throw NotImplementedException("FFMA CC");
25 }
26 const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)};
27 const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
28 const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
29 const IR::FpControl fp_control{
30 .no_contraction{true},
31 .rounding{CastFpRounding(fp_rounding)},
32 .fmz_mode{CastFmzMode(fmz_mode)},
33 };
34 v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control));
35}
36
37void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) {
38 union {
39 u64 raw;
40 BitField<47, 1, u64> cc;
41 BitField<48, 1, u64> neg_b;
42 BitField<49, 1, u64> neg_c;
43 BitField<50, 1, u64> sat;
44 BitField<51, 2, FpRounding> fp_rounding;
45 BitField<53, 2, FmzMode> fmz_mode;
46 } const ffma{insn};
47
48 FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
49 ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
50}
51} // Anonymous namespace
52
53void TranslatorVisitor::FFMA_reg(u64 insn) {
54 FFMA(*this, insn, GetReg20(insn), GetReg39(insn));
55}
56
57void TranslatorVisitor::FFMA_rc(u64) {
58 throw NotImplementedException("FFMA (rc)");
59}
60
61void TranslatorVisitor::FFMA_cr(u64 insn) {
62 FFMA(*this, insn, GetCbuf(insn), GetReg39(insn));
63}
64
65void TranslatorVisitor::FFMA_imm(u64) {
66 throw NotImplementedException("FFMA (imm)");
67}
68
69void TranslatorVisitor::FFMA32I(u64) {
70 throw NotImplementedException("FFMA32I");
71}
72
73} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..743a1e2f0
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class Scale : u64 {
14 None,
15 D2,
16 D4,
17 D8,
18 M8,
19 M4,
20 M2,
21 INVALIDSCALE37,
22};
23
24float ScaleFactor(Scale scale) {
25 switch (scale) {
26 case Scale::None:
27 return 1.0f;
28 case Scale::D2:
29 return 1.0f / 2.0f;
30 case Scale::D4:
31 return 1.0f / 4.0f;
32 case Scale::D8:
33 return 1.0f / 8.0f;
34 case Scale::M8:
35 return 8.0f;
36 case Scale::M4:
37 return 4.0f;
38 case Scale::M2:
39 return 2.0f;
40 case Scale::INVALIDSCALE37:
41 break;
42 }
43 throw NotImplementedException("Invalid FMUL scale {}", scale);
44}
45
46void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode,
47 FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
48 union {
49 u64 raw;
50 BitField<0, 8, IR::Reg> dest_reg;
51 BitField<8, 8, IR::Reg> src_a;
52 } const fmul{insn};
53
54 if (cc) {
55 throw NotImplementedException("FMUL CC");
56 }
57 if (sat) {
58 throw NotImplementedException("FMUL SAT");
59 }
60 IR::U32 op_a{v.X(fmul.src_a)};
61 if (scale != Scale::None) {
62 if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
63 throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
64 }
65 op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
66 }
67 const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
68 const IR::FpControl fp_control{
69 .no_contraction{true},
70 .rounding{CastFpRounding(fp_rounding)},
71 .fmz_mode{CastFmzMode(fmz_mode)},
72 };
73 v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control));
74}
75
76void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
77 union {
78 u64 raw;
79 BitField<39, 2, FpRounding> fp_rounding;
80 BitField<41, 3, Scale> scale;
81 BitField<44, 2, FmzMode> fmz;
82 BitField<47, 1, u64> cc;
83 BitField<48, 1, u64> neg_b;
84 BitField<50, 1, u64> sat;
85 } fmul{insn};
86
87 FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
88 fmul.neg_b != 0);
89}
90} // Anonymous namespace
91
92void TranslatorVisitor::FMUL_reg(u64 insn) {
93 return FMUL(*this, insn, GetReg20(insn));
94}
95
96void TranslatorVisitor::FMUL_cbuf(u64) {
97 throw NotImplementedException("FMUL (cbuf)");
98}
99
100void TranslatorVisitor::FMUL_imm(u64) {
101 throw NotImplementedException("FMUL (imm)");
102}
103
104void TranslatorVisitor::FMUL32I(u64) {
105 throw NotImplementedException("FMUL32I");
106}
107
108} // namespace Shader::Maxwell \ No newline at end of file
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 7bc7ce9f2..548c7f611 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
16 ir.SetReg(dest_reg, value); 16 ir.SetReg(dest_reg, value);
17} 17}
18 18
19IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
20 union {
21 u64 raw;
22 BitField<20, 8, IR::Reg> index;
23 } const reg{insn};
24 return X(reg.index);
25}
26
27IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
28 union {
29 u64 raw;
30 BitField<39, 8, IR::Reg> index;
31 } const reg{insn};
32 return X(reg.index);
33}
34
19IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { 35IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
20 union { 36 union {
21 u64 raw; 37 u64 raw;
@@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
33 return ir.GetCbuf(binding, byte_offset); 49 return ir.GetCbuf(binding, byte_offset);
34} 50}
35 51
36IR::U32 TranslatorVisitor::GetImm(u64 insn) { 52IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
37 union { 53 union {
38 u64 raw; 54 u64 raw;
39 BitField<20, 19, u64> value; 55 BitField<20, 19, u64> value;
@@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) {
44 return ir.Imm32(value); 60 return ir.Imm32(value);
45} 61}
46 62
63IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
64 union {
65 u64 raw;
66 BitField<20, 32, u64> value;
67 } const imm{insn};
68 return ir.Imm32(static_cast<u32>(imm.value));
69}
70
47void TranslatorVisitor::SetZFlag(const IR::U1& value) { 71void TranslatorVisitor::SetZFlag(const IR::U1& value) {
48 ir.SetZFlag(value); 72 ir.SetZFlag(value);
49} 73}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index 8be7d6ff1..ef6d977fe 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -46,7 +46,7 @@ public:
46 void DADD_reg(u64 insn); 46 void DADD_reg(u64 insn);
47 void DADD_cbuf(u64 insn); 47 void DADD_cbuf(u64 insn);
48 void DADD_imm(u64 insn); 48 void DADD_imm(u64 insn);
49 void DEPBAR(u64 insn); 49 void DEPBAR();
50 void DFMA_reg(u64 insn); 50 void DFMA_reg(u64 insn);
51 void DFMA_rc(u64 insn); 51 void DFMA_rc(u64 insn);
52 void DFMA_cr(u64 insn); 52 void DFMA_cr(u64 insn);
@@ -298,9 +298,14 @@ public:
298 [[nodiscard]] IR::U32 X(IR::Reg reg); 298 [[nodiscard]] IR::U32 X(IR::Reg reg);
299 void X(IR::Reg dest_reg, const IR::U32& value); 299 void X(IR::Reg dest_reg, const IR::U32& value);
300 300
301 [[nodiscard]] IR::U32 GetReg20(u64 insn);
302 [[nodiscard]] IR::U32 GetReg39(u64 insn);
303
301 [[nodiscard]] IR::U32 GetCbuf(u64 insn); 304 [[nodiscard]] IR::U32 GetCbuf(u64 insn);
302 305
303 [[nodiscard]] IR::U32 GetImm(u64 insn); 306 [[nodiscard]] IR::U32 GetImm20(u64 insn);
307
308 [[nodiscard]] IR::U32 GetImm32(u64 insn);
304 309
305 void SetZFlag(const IR::U1& value); 310 void SetZFlag(const IR::U1& value);
306 void SetSFlag(const IR::U1& value); 311 void SetSFlag(const IR::U1& value);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..60f79b160
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,106 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
12 bool cc) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a;
17 } const iadd{insn};
18
19 if (sat) {
20 throw NotImplementedException("IADD SAT");
21 }
22 if (x && po) {
23 throw NotImplementedException("IADD X+PO");
24 }
25 // Operand A is always read from here, negated if needed
26 IR::U32 op_a{v.X(iadd.src_a)};
27 if (neg_a) {
28 op_a = v.ir.INeg(op_a);
29 }
30 // Add both operands
31 IR::U32 result{v.ir.IAdd(op_a, op_b)};
32 if (x) {
33 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
34 result = v.ir.IAdd(result, carry);
35 }
36 if (po) {
37 // .PO adds one to the result
38 result = v.ir.IAdd(result, v.ir.Imm32(1));
39 }
40 if (cc) {
41 // Store flags
42 // TODO: Does this grab the result pre-PO or after?
43 if (po) {
44 throw NotImplementedException("IADD CC+PO");
45 }
46 // TODO: How does CC behave when X is set?
47 if (x) {
48 throw NotImplementedException("IADD X+CC");
49 }
50 v.SetZFlag(v.ir.GetZeroFromOp(result));
51 v.SetSFlag(v.ir.GetSignFromOp(result));
52 v.SetCFlag(v.ir.GetCarryFromOp(result));
53 v.SetOFlag(v.ir.GetOverflowFromOp(result));
54 }
55 // Store result
56 v.X(iadd.dest_reg, result);
57}
58
59void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
60 union {
61 u64 insn;
62 BitField<43, 1, u64> x;
63 BitField<47, 1, u64> cc;
64 BitField<48, 2, u64> three_for_po;
65 BitField<48, 1, u64> neg_b;
66 BitField<49, 1, u64> neg_a;
67 BitField<50, 1, u64> sat;
68 } const iadd{insn};
69
70 const bool po{iadd.three_for_po == 3};
71 const bool neg_a{!po && iadd.neg_a != 0};
72 if (!po && iadd.neg_b != 0) {
73 op_b = v.ir.INeg(op_b);
74 }
75 IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
76}
77} // Anonymous namespace
78
79void TranslatorVisitor::IADD_reg(u64) {
80 throw NotImplementedException("IADD (reg)");
81}
82
83void TranslatorVisitor::IADD_cbuf(u64 insn) {
84 IADD(*this, insn, GetCbuf(insn));
85}
86
87void TranslatorVisitor::IADD_imm(u64) {
88 throw NotImplementedException("IADD (imm)");
89}
90
91void TranslatorVisitor::IADD32I(u64 insn) {
92 union {
93 u64 raw;
94 BitField<52, 1, u64> cc;
95 BitField<53, 1, u64> x;
96 BitField<54, 1, u64> sat;
97 BitField<55, 2, u64> three_for_po;
98 BitField<56, 1, u64> neg_a;
99 } const iadd32i{insn};
100
101 const bool po{iadd32i.three_for_po == 3};
102 const bool neg_a{!po && iadd32i.neg_a != 0};
103 IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
104}
105
106} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..f92c0bbd6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,73 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> op_a;
16 BitField<47, 1, u64> cc;
17 BitField<48, 2, u64> three_for_po;
18 BitField<48, 1, u64> neg_b;
19 BitField<49, 1, u64> neg_a;
20 BitField<39, 5, u64> scale;
21 } const iscadd{insn};
22
23 const bool po{iscadd.three_for_po == 3};
24 IR::U32 op_a{v.X(iscadd.op_a)};
25 if (!po) {
26 // When PO is not present, the bits are interpreted as negation
27 if (iscadd.neg_a != 0) {
28 op_a = v.ir.INeg(op_a);
29 }
30 if (iscadd.neg_b != 0) {
31 op_b = v.ir.INeg(op_b);
32 }
33 }
34 // With the operands already processed, scale A
35 const IR::U32 scale{v.ir.Imm32(static_cast<u32>(iscadd.scale))};
36 const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
37
38 IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
39 if (po) {
40 // .PO adds one to the final result
41 result = v.ir.IAdd(result, v.ir.Imm32(1));
42 }
43 v.X(iscadd.dest_reg, result);
44
45 if (iscadd.cc != 0) {
46 throw NotImplementedException("ISCADD CC");
47 }
48}
49
50} // Anonymous namespace
51
52void TranslatorVisitor::ISCADD_reg(u64 insn) {
53 union {
54 u64 raw;
55 BitField<20, 8, IR::Reg> op_b;
56 } const iscadd{insn};
57
58 ISCADD(*this, insn, X(iscadd.op_b));
59}
60
61void TranslatorVisitor::ISCADD_cbuf(u64) {
62 throw NotImplementedException("ISCADD (cbuf)");
63}
64
65void TranslatorVisitor::ISCADD_imm(u64) {
66 throw NotImplementedException("ISCADD (imm)");
67}
68
69void TranslatorVisitor::ISCADD32I(u64) {
70 throw NotImplementedException("ISCADD32I");
71}
72
73} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..76c6b5291
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,99 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class CompareOp : u64 {
12 F, // Always false
13 LT, // Less than
14 EQ, // Equal
15 LE, // Less than or equal
16 GT, // Greater than
17 NE, // Not equal
18 GE, // Greater than or equal
19 T, // Always true
20};
21
22enum class Bop : u64 {
23 AND,
24 OR,
25 XOR,
26};
27
28IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs,
29 bool is_signed) {
30 switch (op) {
31 case CompareOp::F:
32 return ir.Imm1(false);
33 case CompareOp::LT:
34 return ir.ILessThan(lhs, rhs, is_signed);
35 case CompareOp::EQ:
36 return ir.IEqual(lhs, rhs);
37 case CompareOp::LE:
38 return ir.ILessThanEqual(lhs, rhs, is_signed);
39 case CompareOp::GT:
40 return ir.IGreaterThan(lhs, rhs, is_signed);
41 case CompareOp::NE:
42 return ir.INotEqual(lhs, rhs);
43 case CompareOp::GE:
44 return ir.IGreaterThanEqual(lhs, rhs, is_signed);
45 case CompareOp::T:
46 return ir.Imm1(true);
47 }
48 throw NotImplementedException("Invalid ISETP compare op {}", op);
49}
50
51IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) {
52 switch (bop) {
53 case Bop::AND:
54 return ir.LogicalAnd(comparison, bop_pred);
55 case Bop::OR:
56 return ir.LogicalOr(comparison, bop_pred);
57 case Bop::XOR:
58 return ir.LogicalXor(comparison, bop_pred);
59 }
60 throw NotImplementedException("Invalid ISETP bop {}", bop);
61}
62
63void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
64 union {
65 u64 raw;
66 BitField<0, 3, IR::Pred> dest_pred_b;
67 BitField<3, 3, IR::Pred> dest_pred_a;
68 BitField<8, 8, IR::Reg> src_reg_a;
69 BitField<39, 3, IR::Pred> bop_pred;
70 BitField<42, 1, u64> neg_bop_pred;
71 BitField<45, 2, Bop> bop;
72 BitField<48, 1, u64> is_signed;
73 BitField<49, 3, CompareOp> compare_op;
74 } const isetp{insn};
75
76 const Bop bop{isetp.bop};
77 const IR::U32 op_a{v.X(isetp.src_reg_a)};
78 const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)};
79 const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
80 const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)};
81 const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)};
82 v.ir.SetPred(isetp.dest_pred_a, result_a);
83 v.ir.SetPred(isetp.dest_pred_b, result_b);
84}
85} // Anonymous namespace
86
87void TranslatorVisitor::ISETP_reg(u64 insn) {
88 ISETP(*this, insn, GetReg20(insn));
89}
90
91void TranslatorVisitor::ISETP_cbuf(u64 insn) {
92 ISETP(*this, insn, GetCbuf(insn));
93}
94
95void TranslatorVisitor::ISETP_imm(u64) {
96 throw NotImplementedException("ISETP_imm");
97}
98
99} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..d4b417d14
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> w;
17 BitField<43, 1, u64> x;
18 BitField<47, 1, u64> cc;
19 } const shl{insn};
20
21 if (shl.x != 0) {
22 throw NotImplementedException("SHL.X");
23 }
24 if (shl.cc != 0) {
25 throw NotImplementedException("SHL.CC");
26 }
27 const IR::U32 base{v.X(shl.src_reg_a)};
28 IR::U32 result;
29 if (shl.w != 0) {
30 // When .W is set, the shift value is wrapped
31 // To emulate this we just have to clamp it ourselves.
32 const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
33 result = v.ir.ShiftLeftLogical(base, shift);
34 } else {
35 // When .W is not set, the shift value is clamped between 0 and 32.
36 // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
37 // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
38 //
39 // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
40 // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
41 // or equal to the bit width of the components of Base."
42 //
43 // And on the GLASM specification it is also safe to evaluate out of bounds:
44 //
45 // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
46 // "The results of a shift operation ("<<") are undefined if the value of the second operand
47 // is negative, or greater than or equal to the number of bits in the first operand."
48 //
49 // Emphasis on undefined results in contrast to undefined behavior.
50 //
51 const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
52 const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
53 result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0));
54 }
55 v.X(shl.dest_reg, result);
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::SHL_reg(u64) {
60 throw NotImplementedException("SHL_reg");
61}
62
63void TranslatorVisitor::SHL_cbuf(u64) {
64 throw NotImplementedException("SHL_cbuf");
65}
66
67void TranslatorVisitor::SHL_imm(u64 insn) {
68 SHL(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..70a7c76c5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SelectMode : u64 {
12 Default,
13 CLO,
14 CHI,
15 CSFU,
16 CBCC,
17};
18
19enum class Half : u64 {
20 H0, // Least-significant bits (15:0)
21 H1, // Most-significant bits (31:16)
22};
23
24IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
25 const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
26 return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
27}
28
29void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
30 SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
31 union {
32 u64 raw;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<8, 8, IR::Reg> src_reg_a;
35 BitField<47, 1, u64> cc;
36 BitField<48, 1, u64> is_a_signed;
37 BitField<49, 1, u64> is_b_signed;
38 BitField<53, 1, Half> half_a;
39 } const xmad{insn};
40
41 if (x) {
42 throw NotImplementedException("XMAD X");
43 }
44 const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
45 const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
46
47 IR::U32 product{v.ir.IMul(op_a, op_b)};
48 if (psl) {
49 // .PSL shifts the product 16 bits
50 product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
51 }
52 const IR::U32 op_c{[&]() -> IR::U32 {
53 switch (select_mode) {
54 case SelectMode::Default:
55 return src_c;
56 case SelectMode::CLO:
57 return ExtractHalf(v, src_c, Half::H0, false);
58 case SelectMode::CHI:
59 return ExtractHalf(v, src_c, Half::H1, false);
60 case SelectMode::CBCC:
61 return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b);
62 case SelectMode::CSFU:
63 throw NotImplementedException("XMAD CSFU");
64 }
65 throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
66 }()};
67 IR::U32 result{v.ir.IAdd(product, op_c)};
68 if (mrg) {
69 // .MRG inserts src_b [15:0] into result's [31:16].
70 const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
71 result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
72 }
73 if (xmad.cc) {
74 throw NotImplementedException("XMAD CC");
75 }
76 // Store result
77 v.X(xmad.dest_reg, result);
78}
79} // Anonymous namespace
80
81void TranslatorVisitor::XMAD_reg(u64) {
82 throw NotImplementedException("XMAD (reg)");
83}
84
85void TranslatorVisitor::XMAD_rc(u64) {
86 throw NotImplementedException("XMAD (rc)");
87}
88
89void TranslatorVisitor::XMAD_cr(u64) {
90 throw NotImplementedException("XMAD (cr)");
91}
92
93void TranslatorVisitor::XMAD_imm(u64 insn) {
94 union {
95 u64 raw;
96 BitField<20, 16, u64> src_b;
97 BitField<36, 1, u64> psl;
98 BitField<37, 1, u64> mrg;
99 BitField<38, 1, u64> x;
100 BitField<39, 8, IR::Reg> src_c;
101 BitField<50, 3, SelectMode> select_mode;
102 } const xmad{insn};
103
104 const IR::U32 src_b{ir.Imm32(static_cast<u32>(xmad.src_b))};
105 const IR::U32 src_c{X(xmad.src_c)};
106 XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0,
107 xmad.x != 0);
108}
109
110} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
index d8fd387cf..c9669c617 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -10,16 +10,35 @@
10 10
11namespace Shader::Maxwell { 11namespace Shader::Maxwell {
12namespace { 12namespace {
13enum class LoadSize : u64 {
14 U8, // Zero-extend
15 S8, // Sign-extend
16 U16, // Zero-extend
17 S16, // Sign-extend
18 B32,
19 B64,
20 B128,
21 U128, // ???
22};
23
13enum class StoreSize : u64 { 24enum class StoreSize : u64 {
14 U8, 25 U8, // Zero-extend
15 S8, 26 S8, // Sign-extend
16 U16, 27 U16, // Zero-extend
17 S16, 28 S16, // Sign-extend
18 B32, 29 B32,
19 B64, 30 B64,
20 B128, 31 B128,
21}; 32};
22 33
34// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
35enum class LoadCache : u64 {
36 CA, // Cache at all levels, likely to be accessed again
37 CG, // Cache at global level (cache in L2 and below, not L1)
38 CI, // ???
39 CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
40};
41
23// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html 42// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
24enum class StoreCache : u64 { 43enum class StoreCache : u64 {
25 WB, // Cache write-back all coherent levels 44 WB, // Cache write-back all coherent levels
@@ -27,61 +46,137 @@ enum class StoreCache : u64 {
27 CS, // Cache streaming, likely to be accessed once 46 CS, // Cache streaming, likely to be accessed once
28 WT, // Cache write-through (to system memory) 47 WT, // Cache write-through (to system memory)
29}; 48};
30} // Anonymous namespace
31 49
32void TranslatorVisitor::STG(u64 insn) { 50IR::U64 Address(TranslatorVisitor& v, u64 insn) {
33 // STG stores registers into global memory.
34 union { 51 union {
35 u64 raw; 52 u64 raw;
36 BitField<0, 8, IR::Reg> data_reg;
37 BitField<8, 8, IR::Reg> addr_reg; 53 BitField<8, 8, IR::Reg> addr_reg;
54 BitField<20, 24, s64> addr_offset;
55 BitField<20, 24, u64> rz_addr_offset;
38 BitField<45, 1, u64> e; 56 BitField<45, 1, u64> e;
39 BitField<46, 2, StoreCache> cache; 57 } const mem{insn};
40 BitField<48, 3, StoreSize> size;
41 } const stg{insn};
42 58
43 const IR::U64 address{[&]() -> IR::U64 { 59 const IR::U64 address{[&]() -> IR::U64 {
44 if (stg.e == 0) { 60 if (mem.e == 0) {
45 // STG without .E uses a 32-bit pointer, zero-extend it 61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it
46 return ir.ConvertU(64, X(stg.addr_reg)); 62 return v.ir.ConvertU(64, v.X(mem.addr_reg));
47 } 63 }
48 if (!IR::IsAligned(stg.addr_reg, 2)) { 64 if (!IR::IsAligned(mem.addr_reg, 2)) {
49 throw NotImplementedException("Unaligned address register"); 65 throw NotImplementedException("Unaligned address register");
50 } 66 }
51 // Pack two registers to build the 32-bit address 67 // Pack two registers to build the 64-bit address
52 return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); 68 return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
69 }()};
70 const u64 addr_offset{[&]() -> u64 {
71 if (mem.addr_reg == IR::Reg::RZ) {
72 // When RZ is used, the address is an absolute address
73 return static_cast<u64>(mem.rz_addr_offset.Value());
74 } else {
75 return static_cast<u64>(mem.addr_offset.Value());
76 }
53 }()}; 77 }()};
78 // Apply the offset
79 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::LDG(u64 insn) {
84 // LDG loads global memory into registers
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<46, 2, LoadCache> cache;
89 BitField<48, 3, LoadSize> size;
90 } const ldg{insn};
91
92 // Pointer to load data from
93 const IR::U64 address{Address(*this, insn)};
94 const IR::Reg dest_reg{ldg.dest_reg};
95 switch (ldg.size) {
96 case LoadSize::U8:
97 X(dest_reg, ir.LoadGlobalU8(address));
98 break;
99 case LoadSize::S8:
100 X(dest_reg, ir.LoadGlobalS8(address));
101 break;
102 case LoadSize::U16:
103 X(dest_reg, ir.LoadGlobalU16(address));
104 break;
105 case LoadSize::S16:
106 X(dest_reg, ir.LoadGlobalS16(address));
107 break;
108 case LoadSize::B32:
109 X(dest_reg, ir.LoadGlobal32(address));
110 break;
111 case LoadSize::B64: {
112 if (!IR::IsAligned(dest_reg, 2)) {
113 throw NotImplementedException("Unaligned data registers");
114 }
115 const IR::Value vector{ir.LoadGlobal64(address)};
116 for (int i = 0; i < 2; ++i) {
117 X(dest_reg + i, ir.CompositeExtract(vector, i));
118 }
119 break;
120 }
121 case LoadSize::B128: {
122 if (!IR::IsAligned(dest_reg, 4)) {
123 throw NotImplementedException("Unaligned data registers");
124 }
125 const IR::Value vector{ir.LoadGlobal128(address)};
126 for (int i = 0; i < 4; ++i) {
127 X(dest_reg + i, ir.CompositeExtract(vector, i));
128 }
129 break;
130 }
131 case LoadSize::U128:
132 throw NotImplementedException("LDG U.128");
133 default:
134 throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
135 }
136}
137
138void TranslatorVisitor::STG(u64 insn) {
139 // STG stores registers into global memory.
140 union {
141 u64 raw;
142 BitField<0, 8, IR::Reg> data_reg;
143 BitField<46, 2, StoreCache> cache;
144 BitField<48, 3, StoreSize> size;
145 } const stg{insn};
54 146
147 // Pointer to store data into
148 const IR::U64 address{Address(*this, insn)};
149 const IR::Reg data_reg{stg.data_reg};
55 switch (stg.size) { 150 switch (stg.size) {
56 case StoreSize::U8: 151 case StoreSize::U8:
57 ir.WriteGlobalU8(address, X(stg.data_reg)); 152 ir.WriteGlobalU8(address, X(data_reg));
58 break; 153 break;
59 case StoreSize::S8: 154 case StoreSize::S8:
60 ir.WriteGlobalS8(address, X(stg.data_reg)); 155 ir.WriteGlobalS8(address, X(data_reg));
61 break; 156 break;
62 case StoreSize::U16: 157 case StoreSize::U16:
63 ir.WriteGlobalU16(address, X(stg.data_reg)); 158 ir.WriteGlobalU16(address, X(data_reg));
64 break; 159 break;
65 case StoreSize::S16: 160 case StoreSize::S16:
66 ir.WriteGlobalS16(address, X(stg.data_reg)); 161 ir.WriteGlobalS16(address, X(data_reg));
67 break; 162 break;
68 case StoreSize::B32: 163 case StoreSize::B32:
69 ir.WriteGlobal32(address, X(stg.data_reg)); 164 ir.WriteGlobal32(address, X(data_reg));
70 break; 165 break;
71 case StoreSize::B64: { 166 case StoreSize::B64: {
72 if (!IR::IsAligned(stg.data_reg, 2)) { 167 if (!IR::IsAligned(data_reg, 2)) {
73 throw NotImplementedException("Unaligned data registers"); 168 throw NotImplementedException("Unaligned data registers");
74 } 169 }
75 const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; 170 const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
76 ir.WriteGlobal64(address, vector); 171 ir.WriteGlobal64(address, vector);
77 break; 172 break;
78 } 173 }
79 case StoreSize::B128: 174 case StoreSize::B128:
80 if (!IR::IsAligned(stg.data_reg, 4)) { 175 if (!IR::IsAligned(data_reg, 4)) {
81 throw NotImplementedException("Unaligned data registers"); 176 throw NotImplementedException("Unaligned data registers");
82 } 177 }
83 const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), 178 const IR::Value vector{
84 X(stg.data_reg + 2), X(stg.data_reg + 3))}; 179 ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
85 ir.WriteGlobal128(address, vector); 180 ir.WriteGlobal128(address, vector);
86 break; 181 break;
87 } 182 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
index 7fa35ba3a..1711d3f48 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -39,7 +39,7 @@ void TranslatorVisitor::MOV_cbuf(u64 insn) {
39void TranslatorVisitor::MOV_imm(u64 insn) { 39void TranslatorVisitor::MOV_imm(u64 insn) {
40 const MOV mov{insn}; 40 const MOV mov{insn};
41 CheckMask(mov); 41 CheckMask(mov);
42 X(mov.dest_reg, GetImm(insn)); 42 X(mov.dest_reg, GetImm20(insn));
43} 43}
44 44
45} // namespace Shader::Maxwell 45} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..93cea302a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,114 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SpecialRegister : u64 {
12 SR_LANEID = 0,
13 SR_VIRTCFG = 2,
14 SR_VIRTID = 3,
15 SR_PM0 = 4,
16 SR_PM1 = 5,
17 SR_PM2 = 6,
18 SR_PM3 = 7,
19 SR_PM4 = 8,
20 SR_PM5 = 9,
21 SR_PM6 = 10,
22 SR_PM7 = 11,
23 SR_ORDERING_TICKET = 15,
24 SR_PRIM_TYPE = 16,
25 SR_INVOCATION_ID = 17,
26 SR_Y_DIRECTION = 18,
27 SR_THREAD_KILL = 19,
28 SM_SHADER_TYPE = 20,
29 SR_DIRECTCBEWRITEADDRESSLOW = 21,
30 SR_DIRECTCBEWRITEADDRESSHIGH = 22,
31 SR_DIRECTCBEWRITEENABLE = 23,
32 SR_MACHINE_ID_0 = 24,
33 SR_MACHINE_ID_1 = 25,
34 SR_MACHINE_ID_2 = 26,
35 SR_MACHINE_ID_3 = 27,
36 SR_AFFINITY = 28,
37 SR_INVOCATION_INFO = 29,
38 SR_WSCALEFACTOR_XY = 30,
39 SR_WSCALEFACTOR_Z = 31,
40 SR_TID = 32,
41 SR_TID_X = 33,
42 SR_TID_Y = 34,
43 SR_TID_Z = 35,
44 SR_CTAID_X = 37,
45 SR_CTAID_Y = 38,
46 SR_CTAID_Z = 39,
47 SR_NTID = 49,
48 SR_CirQueueIncrMinusOne = 50,
49 SR_NLATC = 51,
50 SR_SWINLO = 57,
51 SR_SWINSZ = 58,
52 SR_SMEMSZ = 59,
53 SR_SMEMBANKS = 60,
54 SR_LWINLO = 61,
55 SR_LWINSZ = 62,
56 SR_LMEMLOSZ = 63,
57 SR_LMEMHIOFF = 64,
58 SR_EQMASK = 65,
59 SR_LTMASK = 66,
60 SR_LEMASK = 67,
61 SR_GTMASK = 68,
62 SR_GEMASK = 69,
63 SR_REGALLOC = 70,
64 SR_GLOBALERRORSTATUS = 73,
65 SR_WARPERRORSTATUS = 75,
66 SR_PM_HI0 = 81,
67 SR_PM_HI1 = 82,
68 SR_PM_HI2 = 83,
69 SR_PM_HI3 = 84,
70 SR_PM_HI4 = 85,
71 SR_PM_HI5 = 86,
72 SR_PM_HI6 = 87,
73 SR_PM_HI7 = 88,
74 SR_CLOCKLO = 89,
75 SR_CLOCKHI = 90,
76 SR_GLOBALTIMERLO = 91,
77 SR_GLOBALTIMERHI = 92,
78 SR_HWTASKID = 105,
79 SR_CIRCULARQUEUEENTRYINDEX = 106,
80 SR_CIRCULARQUEUEENTRYADDRESSLOW = 107,
81 SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108,
82};
83
84[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
85 switch (special_register) {
86 case SpecialRegister::SR_TID_X:
87 return ir.LocalInvocationIdX();
88 case SpecialRegister::SR_TID_Y:
89 return ir.LocalInvocationIdY();
90 case SpecialRegister::SR_TID_Z:
91 return ir.LocalInvocationIdZ();
92 case SpecialRegister::SR_CTAID_X:
93 return ir.WorkgroupIdX();
94 case SpecialRegister::SR_CTAID_Y:
95 return ir.WorkgroupIdY();
96 case SpecialRegister::SR_CTAID_Z:
97 return ir.WorkgroupIdZ();
98 default:
99 throw NotImplementedException("S2R special register {}", special_register);
100 }
101}
102} // Anonymous namespace
103
104void TranslatorVisitor::S2R(u64 insn) {
105 union {
106 u64 raw;
107 BitField<0, 8, IR::Reg> dest_reg;
108 BitField<20, 8, SpecialRegister> src_reg;
109 } const s2r{insn};
110
111 X(s2r.dest_reg, Read(ir, s2r.src_reg));
112}
113
114} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 0f52696d1..d70399f6b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -7,21 +7,8 @@
7#include "shader_recompiler/frontend/maxwell/opcode.h" 7#include "shader_recompiler/frontend/maxwell/opcode.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" 8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9 9
10#include "shader_recompiler/ir_opt/passes.h"
11
12namespace Shader::Maxwell { 10namespace Shader::Maxwell {
13 11
14[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) {
15 auto raw{IR::DumpBlock(block)};
16
17 Optimization::GetSetElimination(block);
18 Optimization::DeadCodeEliminationPass(block);
19 Optimization::IdentityRemovalPass(block);
20 auto dumped{IR::DumpBlock(block)};
21
22 fmt::print(stderr, "{}", dumped);
23}
24
25[[noreturn]] static void ThrowNotImplemented(Opcode opcode) { 12[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
26 throw NotImplementedException("Instruction {} is not implemented", opcode); 13 throw NotImplementedException("Instruction {} is not implemented", opcode);
27} 14}
@@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) {
146 ThrowNotImplemented(Opcode::DADD_imm); 133 ThrowNotImplemented(Opcode::DADD_imm);
147} 134}
148 135
149void TranslatorVisitor::DEPBAR(u64) { 136void TranslatorVisitor::DEPBAR() {
150 ThrowNotImplemented(Opcode::DEPBAR); 137 // DEPBAR is a no-op
151} 138}
152 139
153void TranslatorVisitor::DFMA_reg(u64) { 140void TranslatorVisitor::DFMA_reg(u64) {
@@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) {
230 ThrowNotImplemented(Opcode::F2F_imm); 217 ThrowNotImplemented(Opcode::F2F_imm);
231} 218}
232 219
233void TranslatorVisitor::FADD_reg(u64) {
234 ThrowNotImplemented(Opcode::FADD_reg);
235}
236
237void TranslatorVisitor::FADD_cbuf(u64) {
238 ThrowNotImplemented(Opcode::FADD_cbuf);
239}
240
241void TranslatorVisitor::FADD_imm(u64) {
242 ThrowNotImplemented(Opcode::FADD_imm);
243}
244
245void TranslatorVisitor::FADD32I(u64) {
246 ThrowNotImplemented(Opcode::FADD32I);
247}
248
249void TranslatorVisitor::FCHK_reg(u64) { 220void TranslatorVisitor::FCHK_reg(u64) {
250 ThrowNotImplemented(Opcode::FCHK_reg); 221 ThrowNotImplemented(Opcode::FCHK_reg);
251} 222}
@@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) {
274 ThrowNotImplemented(Opcode::FCMP_imm); 245 ThrowNotImplemented(Opcode::FCMP_imm);
275} 246}
276 247
277void TranslatorVisitor::FFMA_reg(u64) {
278 ThrowNotImplemented(Opcode::FFMA_reg);
279}
280
281void TranslatorVisitor::FFMA_rc(u64) {
282 ThrowNotImplemented(Opcode::FFMA_rc);
283}
284
285void TranslatorVisitor::FFMA_cr(u64) {
286 ThrowNotImplemented(Opcode::FFMA_cr);
287}
288
289void TranslatorVisitor::FFMA_imm(u64) {
290 ThrowNotImplemented(Opcode::FFMA_imm);
291}
292
293void TranslatorVisitor::FFMA32I(u64) {
294 ThrowNotImplemented(Opcode::FFMA32I);
295}
296
297void TranslatorVisitor::FLO_reg(u64) { 248void TranslatorVisitor::FLO_reg(u64) {
298 ThrowNotImplemented(Opcode::FLO_reg); 249 ThrowNotImplemented(Opcode::FLO_reg);
299} 250}
@@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) {
318 ThrowNotImplemented(Opcode::FMNMX_imm); 269 ThrowNotImplemented(Opcode::FMNMX_imm);
319} 270}
320 271
321void TranslatorVisitor::FMUL_reg(u64) {
322 ThrowNotImplemented(Opcode::FMUL_reg);
323}
324
325void TranslatorVisitor::FMUL_cbuf(u64) {
326 ThrowNotImplemented(Opcode::FMUL_cbuf);
327}
328
329void TranslatorVisitor::FMUL_imm(u64) {
330 ThrowNotImplemented(Opcode::FMUL_imm);
331}
332
333void TranslatorVisitor::FMUL32I(u64) {
334 ThrowNotImplemented(Opcode::FMUL32I);
335}
336
337void TranslatorVisitor::FSET_reg(u64) { 272void TranslatorVisitor::FSET_reg(u64) {
338 ThrowNotImplemented(Opcode::FSET_reg); 273 ThrowNotImplemented(Opcode::FSET_reg);
339} 274}
@@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) {
470 ThrowNotImplemented(Opcode::I2I_imm); 405 ThrowNotImplemented(Opcode::I2I_imm);
471} 406}
472 407
473void TranslatorVisitor::IADD_reg(u64) {
474 ThrowNotImplemented(Opcode::IADD_reg);
475}
476
477void TranslatorVisitor::IADD_cbuf(u64) {
478 ThrowNotImplemented(Opcode::IADD_cbuf);
479}
480
481void TranslatorVisitor::IADD_imm(u64) {
482 ThrowNotImplemented(Opcode::IADD_imm);
483}
484
485void TranslatorVisitor::IADD3_reg(u64) { 408void TranslatorVisitor::IADD3_reg(u64) {
486 ThrowNotImplemented(Opcode::IADD3_reg); 409 ThrowNotImplemented(Opcode::IADD3_reg);
487} 410}
@@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) {
494 ThrowNotImplemented(Opcode::IADD3_imm); 417 ThrowNotImplemented(Opcode::IADD3_imm);
495} 418}
496 419
497void TranslatorVisitor::IADD32I(u64) {
498 ThrowNotImplemented(Opcode::IADD32I);
499}
500
501void TranslatorVisitor::ICMP_reg(u64) { 420void TranslatorVisitor::ICMP_reg(u64) {
502 ThrowNotImplemented(Opcode::ICMP_reg); 421 ThrowNotImplemented(Opcode::ICMP_reg);
503} 422}
@@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) {
594 ThrowNotImplemented(Opcode::ISBERD); 513 ThrowNotImplemented(Opcode::ISBERD);
595} 514}
596 515
597void TranslatorVisitor::ISCADD_reg(u64) {
598 ThrowNotImplemented(Opcode::ISCADD_reg);
599}
600
601void TranslatorVisitor::ISCADD_cbuf(u64) {
602 ThrowNotImplemented(Opcode::ISCADD_cbuf);
603}
604
605void TranslatorVisitor::ISCADD_imm(u64) {
606 ThrowNotImplemented(Opcode::ISCADD_imm);
607}
608
609void TranslatorVisitor::ISCADD32I(u64) {
610 ThrowNotImplemented(Opcode::ISCADD32I);
611}
612
613void TranslatorVisitor::ISET_reg(u64) { 516void TranslatorVisitor::ISET_reg(u64) {
614 ThrowNotImplemented(Opcode::ISET_reg); 517 ThrowNotImplemented(Opcode::ISET_reg);
615} 518}
@@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) {
622 ThrowNotImplemented(Opcode::ISET_imm); 525 ThrowNotImplemented(Opcode::ISET_imm);
623} 526}
624 527
625void TranslatorVisitor::ISETP_reg(u64) {
626 ThrowNotImplemented(Opcode::ISETP_reg);
627}
628
629void TranslatorVisitor::ISETP_cbuf(u64) {
630 ThrowNotImplemented(Opcode::ISETP_cbuf);
631}
632
633void TranslatorVisitor::ISETP_imm(u64) {
634 ThrowNotImplemented(Opcode::ISETP_imm);
635}
636
637void TranslatorVisitor::JCAL(u64) { 528void TranslatorVisitor::JCAL(u64) {
638 ThrowNotImplemented(Opcode::JCAL); 529 ThrowNotImplemented(Opcode::JCAL);
639} 530}
@@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) {
658 ThrowNotImplemented(Opcode::LDC); 549 ThrowNotImplemented(Opcode::LDC);
659} 550}
660 551
661void TranslatorVisitor::LDG(u64) {
662 ThrowNotImplemented(Opcode::LDG);
663}
664
665void TranslatorVisitor::LDL(u64) { 552void TranslatorVisitor::LDL(u64) {
666 ThrowNotImplemented(Opcode::LDL); 553 ThrowNotImplemented(Opcode::LDL);
667} 554}
@@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) {
866 ThrowNotImplemented(Opcode::RTT); 753 ThrowNotImplemented(Opcode::RTT);
867} 754}
868 755
869void TranslatorVisitor::S2R(u64) {
870 ThrowNotImplemented(Opcode::S2R);
871}
872
873void TranslatorVisitor::SAM(u64) { 756void TranslatorVisitor::SAM(u64) {
874 ThrowNotImplemented(Opcode::SAM); 757 ThrowNotImplemented(Opcode::SAM);
875} 758}
@@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) {
914 ThrowNotImplemented(Opcode::SHFL); 797 ThrowNotImplemented(Opcode::SHFL);
915} 798}
916 799
917void TranslatorVisitor::SHL_reg(u64) {
918 ThrowNotImplemented(Opcode::SHL_reg);
919}
920
921void TranslatorVisitor::SHL_cbuf(u64) {
922 ThrowNotImplemented(Opcode::SHL_cbuf);
923}
924
925void TranslatorVisitor::SHL_imm(u64) {
926 ThrowNotImplemented(Opcode::SHL_imm);
927}
928
929void TranslatorVisitor::SHR_reg(u64) { 800void TranslatorVisitor::SHR_reg(u64) {
930 ThrowNotImplemented(Opcode::SHR_reg); 801 ThrowNotImplemented(Opcode::SHR_reg);
931} 802}
@@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) {
1086 ThrowNotImplemented(Opcode::VSHR); 957 ThrowNotImplemented(Opcode::VSHR);
1087} 958}
1088 959
1089void TranslatorVisitor::XMAD_reg(u64) {
1090 ThrowNotImplemented(Opcode::XMAD_reg);
1091}
1092
1093void TranslatorVisitor::XMAD_rc(u64) {
1094 ThrowNotImplemented(Opcode::XMAD_rc);
1095}
1096
1097void TranslatorVisitor::XMAD_cr(u64) {
1098 ThrowNotImplemented(Opcode::XMAD_cr);
1099}
1100
1101void TranslatorVisitor::XMAD_imm(u64) {
1102 ThrowNotImplemented(Opcode::XMAD_imm);
1103}
1104
1105} // namespace Shader::Maxwell 960} // namespace Shader::Maxwell