summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/frontend/maxwell/translate/impl
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell/translate/impl')
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp35
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp96
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp74
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h57
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp153
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp72
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp50
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp43
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp47
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp78
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp253
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp94
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp60
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp125
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp169
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h42
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp143
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp117
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp118
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp272
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h387
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp105
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp48
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp80
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp86
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp135
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp126
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h39
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp196
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp218
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp116
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp181
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp283
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp45
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp46
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp38
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp205
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp281
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp236
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp266
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp208
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp134
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp165
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp242
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp131
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp76
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp30
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h23
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp69
91 files changed, 9761 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..d9f999e05
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,214 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21 SAFEADD,
22};
23
24enum class AtomSize : u64 {
25 U32,
26 S32,
27 U64,
28 F32,
29 F16x2,
30 S64,
31};
32
33IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
34 AtomOp op, bool is_signed) {
35 switch (op) {
36 case AtomOp::ADD:
37 return ir.GlobalAtomicIAdd(offset, op_b);
38 case AtomOp::MIN:
39 return ir.GlobalAtomicIMin(offset, op_b, is_signed);
40 case AtomOp::MAX:
41 return ir.GlobalAtomicIMax(offset, op_b, is_signed);
42 case AtomOp::INC:
43 return ir.GlobalAtomicInc(offset, op_b);
44 case AtomOp::DEC:
45 return ir.GlobalAtomicDec(offset, op_b);
46 case AtomOp::AND:
47 return ir.GlobalAtomicAnd(offset, op_b);
48 case AtomOp::OR:
49 return ir.GlobalAtomicOr(offset, op_b);
50 case AtomOp::XOR:
51 return ir.GlobalAtomicXor(offset, op_b);
52 case AtomOp::EXCH:
53 return ir.GlobalAtomicExchange(offset, op_b);
54 default:
55 throw NotImplementedException("Integer Atom Operation {}", op);
56 }
57}
58
59IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
60 AtomSize size) {
61 static constexpr IR::FpControl f16_control{
62 .no_contraction = false,
63 .rounding = IR::FpRounding::RN,
64 .fmz_mode = IR::FmzMode::DontCare,
65 };
66 static constexpr IR::FpControl f32_control{
67 .no_contraction = false,
68 .rounding = IR::FpRounding::RN,
69 .fmz_mode = IR::FmzMode::FTZ,
70 };
71 switch (op) {
72 case AtomOp::ADD:
73 return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
74 : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
75 case AtomOp::MIN:
76 return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
77 case AtomOp::MAX:
78 return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
79 default:
80 throw NotImplementedException("FP Atom Operation {}", op);
81 }
82}
83
84IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
85 union {
86 u64 raw;
87 BitField<8, 8, IR::Reg> addr_reg;
88 BitField<28, 20, s64> addr_offset;
89 BitField<28, 20, u64> rz_addr_offset;
90 BitField<48, 1, u64> e;
91 } const mem{insn};
92
93 const IR::U64 address{[&]() -> IR::U64 {
94 if (mem.e == 0) {
95 return v.ir.UConvert(64, v.X(mem.addr_reg));
96 }
97 return v.L(mem.addr_reg);
98 }()};
99 const u64 addr_offset{[&]() -> u64 {
100 if (mem.addr_reg == IR::Reg::RZ) {
101 // When RZ is used, the address is an absolute address
102 return static_cast<u64>(mem.rz_addr_offset.Value());
103 } else {
104 return static_cast<u64>(mem.addr_offset.Value());
105 }
106 }()};
107 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
108}
109
110bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
111 // TODO: SAFEADD
112 switch (size) {
113 case AtomSize::S32:
114 case AtomSize::U64:
115 return (op == AtomOp::INC || op == AtomOp::DEC);
116 case AtomSize::S64:
117 return !(op == AtomOp::MIN || op == AtomOp::MAX);
118 case AtomSize::F32:
119 return op != AtomOp::ADD;
120 case AtomSize::F16x2:
121 return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
122 default:
123 return false;
124 }
125}
126
127IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
128 switch (size) {
129 case AtomSize::U32:
130 case AtomSize::S32:
131 case AtomSize::F32:
132 case AtomSize::F16x2:
133 return ir.LoadGlobal32(offset);
134 case AtomSize::U64:
135 case AtomSize::S64:
136 return ir.PackUint2x32(ir.LoadGlobal64(offset));
137 default:
138 throw NotImplementedException("Atom Size {}", size);
139 }
140}
141
142void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
143 switch (size) {
144 case AtomSize::U32:
145 case AtomSize::S32:
146 case AtomSize::F16x2:
147 return v.X(dest_reg, IR::U32{result});
148 case AtomSize::U64:
149 case AtomSize::S64:
150 return v.L(dest_reg, IR::U64{result});
151 case AtomSize::F32:
152 return v.F(dest_reg, IR::F32{result});
153 default:
154 break;
155 }
156}
157
158IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
159 AtomSize size, AtomOp op) {
160 switch (size) {
161 case AtomSize::U32:
162 case AtomSize::S32:
163 return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
164 case AtomSize::U64:
165 case AtomSize::S64:
166 return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
167 case AtomSize::F32:
168 return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
169 case AtomSize::F16x2: {
170 return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
171 }
172 default:
173 throw NotImplementedException("Atom Size {}", size);
174 }
175}
176
177void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
178 const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
179 IR::Value result;
180 if (AtomOpNotApplicable(size, op)) {
181 result = LoadGlobal(v.ir, offset, size);
182 } else {
183 result = ApplyAtomOp(v, operand_reg, offset, size, op);
184 }
185 if (write_dest) {
186 StoreResult(v, dest_reg, result, size);
187 }
188}
189} // Anonymous namespace
190
191void TranslatorVisitor::ATOM(u64 insn) {
192 union {
193 u64 raw;
194 BitField<0, 8, IR::Reg> dest_reg;
195 BitField<20, 8, IR::Reg> operand_reg;
196 BitField<49, 3, AtomSize> size;
197 BitField<52, 4, AtomOp> op;
198 } const atom{insn};
199 const IR::U64 offset{AtomOffset(*this, insn)};
200 GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
201}
202
203void TranslatorVisitor::RED(u64 insn) {
204 union {
205 u64 raw;
206 BitField<0, 8, IR::Reg> operand_reg;
207 BitField<20, 3, AtomSize> size;
208 BitField<23, 3, AtomOp> op;
209 } const red{insn};
210 const IR::U64 offset{AtomOffset(*this, insn)};
211 GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
212}
213
214} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21};
22
23enum class AtomsSize : u64 {
24 U32,
25 S32,
26 U64,
27};
28
29IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
30 bool is_signed) {
31 switch (op) {
32 case AtomOp::ADD:
33 return ir.SharedAtomicIAdd(offset, op_b);
34 case AtomOp::MIN:
35 return ir.SharedAtomicIMin(offset, op_b, is_signed);
36 case AtomOp::MAX:
37 return ir.SharedAtomicIMax(offset, op_b, is_signed);
38 case AtomOp::INC:
39 return ir.SharedAtomicInc(offset, op_b);
40 case AtomOp::DEC:
41 return ir.SharedAtomicDec(offset, op_b);
42 case AtomOp::AND:
43 return ir.SharedAtomicAnd(offset, op_b);
44 case AtomOp::OR:
45 return ir.SharedAtomicOr(offset, op_b);
46 case AtomOp::XOR:
47 return ir.SharedAtomicXor(offset, op_b);
48 case AtomOp::EXCH:
49 return ir.SharedAtomicExchange(offset, op_b);
50 default:
51 throw NotImplementedException("Integer Atoms Operation {}", op);
52 }
53}
54
55IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
56 union {
57 u64 raw;
58 BitField<8, 8, IR::Reg> offset_reg;
59 BitField<30, 22, u64> absolute_offset;
60 BitField<30, 22, s64> relative_offset;
61 } const encoding{insn};
62
63 if (encoding.offset_reg == IR::Reg::RZ) {
64 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
65 } else {
66 const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
67 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
68 }
69}
70
71void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
72 switch (size) {
73 case AtomsSize::U32:
74 case AtomsSize::S32:
75 return v.X(dest_reg, IR::U32{result});
76 case AtomsSize::U64:
77 return v.L(dest_reg, IR::U64{result});
78 default:
79 break;
80 }
81}
82} // Anonymous namespace
83
84void TranslatorVisitor::ATOMS(u64 insn) {
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<8, 8, IR::Reg> addr_reg;
89 BitField<20, 8, IR::Reg> src_reg_b;
90 BitField<28, 2, AtomsSize> size;
91 BitField<52, 4, AtomOp> op;
92 } const atoms{insn};
93
94 const bool size_64{atoms.size == AtomsSize::U64};
95 if (size_64 && atoms.op != AtomOp::EXCH) {
96 throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
97 }
98 const bool is_signed{atoms.size == AtomsSize::S32};
99 const IR::U32 offset{AtomsOffset(*this, insn)};
100
101 IR::Value result;
102 if (size_64) {
103 result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
104 } else {
105 result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
106 }
107 StoreResult(*this, atoms.dest_reg, result, atoms.size);
108}
109
110} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
new file mode 100644
index 000000000..fb3f00d3f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
@@ -0,0 +1,35 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12enum class BitSize : u64 {
13 B32,
14 B64,
15 B96,
16 B128,
17};
18
19void TranslatorVisitor::AL2P(u64 inst) {
20 union {
21 u64 raw;
22 BitField<0, 8, IR::Reg> result_register;
23 BitField<8, 8, IR::Reg> indexing_register;
24 BitField<20, 11, s64> offset;
25 BitField<47, 2, BitSize> bitsize;
26 } al2p{inst};
27 if (al2p.bitsize != BitSize::B32) {
28 throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
29 }
30 const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
31 const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
32 X(al2p.result_register, result);
33}
34
35} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
new file mode 100644
index 000000000..86e433e41
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -0,0 +1,96 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13// Seems to be in CUDA terminology.
14enum class LocalScope : u64 {
15 CTA,
16 GL,
17 SYS,
18 VC,
19};
20} // Anonymous namespace
21
22void TranslatorVisitor::MEMBAR(u64 inst) {
23 union {
24 u64 raw;
25 BitField<8, 2, LocalScope> scope;
26 } const membar{inst};
27
28 if (membar.scope == LocalScope::CTA) {
29 ir.WorkgroupMemoryBarrier();
30 } else {
31 ir.DeviceMemoryBarrier();
32 }
33}
34
35void TranslatorVisitor::DEPBAR() {
36 // DEPBAR is a no-op
37}
38
39void TranslatorVisitor::BAR(u64 insn) {
40 enum class Mode {
41 RedPopc,
42 Scan,
43 RedAnd,
44 RedOr,
45 Sync,
46 Arrive,
47 };
48 union {
49 u64 raw;
50 BitField<43, 1, u64> is_a_imm;
51 BitField<44, 1, u64> is_b_imm;
52 BitField<8, 8, u64> imm_a;
53 BitField<20, 12, u64> imm_b;
54 BitField<42, 1, u64> neg_pred;
55 BitField<39, 3, IR::Pred> pred;
56 } const bar{insn};
57
58 const Mode mode{[insn] {
59 switch (insn & 0x0000009B00000000ULL) {
60 case 0x0000000200000000ULL:
61 return Mode::RedPopc;
62 case 0x0000000300000000ULL:
63 return Mode::Scan;
64 case 0x0000000A00000000ULL:
65 return Mode::RedAnd;
66 case 0x0000001200000000ULL:
67 return Mode::RedOr;
68 case 0x0000008000000000ULL:
69 return Mode::Sync;
70 case 0x0000008100000000ULL:
71 return Mode::Arrive;
72 }
73 throw NotImplementedException("Invalid encoding");
74 }()};
75 if (mode != Mode::Sync) {
76 throw NotImplementedException("BAR mode {}", mode);
77 }
78 if (bar.is_a_imm == 0) {
79 throw NotImplementedException("Non-immediate input A");
80 }
81 if (bar.imm_a != 0) {
82 throw NotImplementedException("Non-zero input A");
83 }
84 if (bar.is_b_imm == 0) {
85 throw NotImplementedException("Non-immediate input B");
86 }
87 if (bar.imm_b != 0) {
88 throw NotImplementedException("Non-zero input B");
89 }
90 if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
91 throw NotImplementedException("Non-true input predicate");
92 }
93 ir.Barrier();
94}
95
96} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
new file mode 100644
index 000000000..9d5a87e52
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
@@ -0,0 +1,74 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> offset_reg;
16 BitField<40, 1, u64> brev;
17 BitField<47, 1, u64> cc;
18 BitField<48, 1, u64> is_signed;
19 } const bfe{insn};
20
21 const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
22 const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
23
24 // Common constants
25 const IR::U32 zero{v.ir.Imm32(0)};
26 const IR::U32 one{v.ir.Imm32(1)};
27 const IR::U32 max_size{v.ir.Imm32(32)};
28 // Edge case conditions
29 const IR::U1 zero_count{v.ir.IEqual(count, zero)};
30 const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
31 const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
32
33 IR::U32 base{v.X(bfe.offset_reg)};
34 if (bfe.brev != 0) {
35 base = v.ir.BitReverse(base);
36 }
37 IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
38 if (bfe.is_signed != 0) {
39 const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
40 const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
41 const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
42 // Replicate condition
43 result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
44 // Exceeding condition
45 const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
46 result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
47 }
48 // Zero count condition
49 result = IR::U32{v.ir.Select(zero_count, zero, result)};
50
51 v.X(bfe.dest_reg, result);
52
53 if (bfe.cc != 0) {
54 v.SetZFlag(v.ir.IEqual(result, zero));
55 v.SetSFlag(v.ir.ILessThan(result, zero, true));
56 v.ResetCFlag();
57 v.ResetOFlag();
58 }
59}
60} // Anonymous namespace
61
62void TranslatorVisitor::BFE_reg(u64 insn) {
63 BFE(*this, insn, GetReg20(insn));
64}
65
66void TranslatorVisitor::BFE_cbuf(u64 insn) {
67 BFE(*this, insn, GetCbuf(insn));
68}
69
70void TranslatorVisitor::BFE_imm(u64 insn) {
71 BFE(*this, insn, GetImm20(insn));
72}
73
74} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
new file mode 100644
index 000000000..1e1ec2119
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> insert_reg;
16 BitField<47, 1, u64> cc;
17 } const bfi{insn};
18
19 const IR::U32 zero{v.ir.Imm32(0)};
20 const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
21 const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
22 const IR::U32 max_size{v.ir.Imm32(32)};
23
24 // Edge case conditions
25 const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
26 const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
27
28 const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
29 const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
30
31 const IR::U32 insert{v.X(bfi.insert_reg)};
32 IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
33
34 result = IR::U32{v.ir.Select(exceed_offset, base, result)};
35
36 v.X(bfi.dest_reg, result);
37 if (bfi.cc != 0) {
38 v.SetZFlag(v.ir.IEqual(result, zero));
39 v.SetSFlag(v.ir.ILessThan(result, zero, true));
40 v.ResetCFlag();
41 v.ResetOFlag();
42 }
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::BFI_reg(u64 insn) {
47 BFI(*this, insn, GetReg20(insn), GetReg39(insn));
48}
49
50void TranslatorVisitor::BFI_rc(u64 insn) {
51 BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
52}
53
54void TranslatorVisitor::BFI_cr(u64 insn) {
55 BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
56}
57
58void TranslatorVisitor::BFI_imm(u64 insn) {
59 BFI(*this, insn, GetImm20(insn), GetReg39(insn));
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 000000000..371c0e0f7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void Check(u64 insn) {
13 union {
14 u64 raw;
15 BitField<5, 1, u64> cbuf_mode;
16 BitField<6, 1, u64> lmt;
17 } const encoding{insn};
18
19 if (encoding.cbuf_mode != 0) {
20 throw NotImplementedException("Constant buffer mode");
21 }
22 if (encoding.lmt != 0) {
23 throw NotImplementedException("LMT");
24 }
25}
26} // Anonymous namespace
27
28void TranslatorVisitor::BRX(u64 insn) {
29 Check(insn);
30}
31
32void TranslatorVisitor::JMX(u64 insn) {
33 Check(insn);
34}
35
36} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..fd73f656c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,57 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
11
12namespace Shader::Maxwell {
13
14enum class FpRounding : u64 {
15 RN,
16 RM,
17 RP,
18 RZ,
19};
20
21enum class FmzMode : u64 {
22 None,
23 FTZ,
24 FMZ,
25 INVALIDFMZ3,
26};
27
28inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
29 switch (fp_rounding) {
30 case FpRounding::RN:
31 return IR::FpRounding::RN;
32 case FpRounding::RM:
33 return IR::FpRounding::RM;
34 case FpRounding::RP:
35 return IR::FpRounding::RP;
36 case FpRounding::RZ:
37 return IR::FpRounding::RZ;
38 }
39 throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
40}
41
42inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
43 switch (fmz_mode) {
44 case FmzMode::None:
45 return IR::FmzMode::None;
46 case FmzMode::FTZ:
47 return IR::FmzMode::FTZ;
48 case FmzMode::FMZ:
49 // FMZ is manually handled in the instruction
50 return IR::FmzMode::FTZ;
51 case FmzMode::INVALIDFMZ3:
52 break;
53 }
54 throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
55}
56
57} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
new file mode 100644
index 000000000..20458d2ad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -0,0 +1,153 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
6
7namespace Shader::Maxwell {
8IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
9 CompareOp compare_op, bool is_signed) {
10 switch (compare_op) {
11 case CompareOp::False:
12 return ir.Imm1(false);
13 case CompareOp::LessThan:
14 return ir.ILessThan(operand_1, operand_2, is_signed);
15 case CompareOp::Equal:
16 return ir.IEqual(operand_1, operand_2);
17 case CompareOp::LessThanEqual:
18 return ir.ILessThanEqual(operand_1, operand_2, is_signed);
19 case CompareOp::GreaterThan:
20 return ir.IGreaterThan(operand_1, operand_2, is_signed);
21 case CompareOp::NotEqual:
22 return ir.INotEqual(operand_1, operand_2);
23 case CompareOp::GreaterThanEqual:
24 return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
25 case CompareOp::True:
26 return ir.Imm1(true);
27 default:
28 throw NotImplementedException("Invalid compare op {}", compare_op);
29 }
30}
31
32IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
33 CompareOp compare_op, bool is_signed) {
34 const IR::U32 zero{ir.Imm32(0)};
35 const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
36 const IR::U1 z_flag{ir.GetZFlag()};
37 const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
38 const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
39 : ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
40 ir.ILessThan(operand_2, zero, true))};
41 switch (compare_op) {
42 case CompareOp::False:
43 return ir.Imm1(false);
44 case CompareOp::LessThan:
45 return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
46 ir.ILessThan(intermediate, zero, true))};
47 case CompareOp::Equal:
48 return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
49 case CompareOp::LessThanEqual: {
50 const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
51 ir.ILessThan(intermediate, zero, true))};
52 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
53 }
54 case CompareOp::GreaterThan: {
55 const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
56 ir.IGreaterThan(intermediate, zero, true))};
57 const IR::U1 not_z{ir.LogicalNot(z_flag)};
58 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
59 }
60 case CompareOp::NotEqual:
61 return ir.LogicalOr(ir.INotEqual(intermediate, zero),
62 ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
63 case CompareOp::GreaterThanEqual: {
64 const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
65 ir.IGreaterThanEqual(intermediate, zero, true))};
66 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
67 }
68 case CompareOp::True:
69 return ir.Imm1(true);
70 default:
71 throw NotImplementedException("Invalid compare op {}", compare_op);
72 }
73}
74
75IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
76 BooleanOp bop) {
77 switch (bop) {
78 case BooleanOp::AND:
79 return ir.LogicalAnd(predicate_1, predicate_2);
80 case BooleanOp::OR:
81 return ir.LogicalOr(predicate_1, predicate_2);
82 case BooleanOp::XOR:
83 return ir.LogicalXor(predicate_1, predicate_2);
84 default:
85 throw NotImplementedException("Invalid bop {}", bop);
86 }
87}
88
89IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
90 switch (op) {
91 case PredicateOp::False:
92 return ir.Imm1(false);
93 case PredicateOp::True:
94 return ir.Imm1(true);
95 case PredicateOp::Zero:
96 return ir.IEqual(result, ir.Imm32(0));
97 case PredicateOp::NonZero:
98 return ir.INotEqual(result, ir.Imm32(0));
99 default:
100 throw NotImplementedException("Invalid Predicate operation {}", op);
101 }
102}
103
104bool IsCompareOpOrdered(FPCompareOp op) {
105 switch (op) {
106 case FPCompareOp::LTU:
107 case FPCompareOp::EQU:
108 case FPCompareOp::LEU:
109 case FPCompareOp::GTU:
110 case FPCompareOp::NEU:
111 case FPCompareOp::GEU:
112 return false;
113 default:
114 return true;
115 }
116}
117
118IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
119 const IR::F16F32F64& operand_2, FPCompareOp compare_op,
120 IR::FpControl control) {
121 const bool ordered{IsCompareOpOrdered(compare_op)};
122 switch (compare_op) {
123 case FPCompareOp::F:
124 return ir.Imm1(false);
125 case FPCompareOp::LT:
126 case FPCompareOp::LTU:
127 return ir.FPLessThan(operand_1, operand_2, control, ordered);
128 case FPCompareOp::EQ:
129 case FPCompareOp::EQU:
130 return ir.FPEqual(operand_1, operand_2, control, ordered);
131 case FPCompareOp::LE:
132 case FPCompareOp::LEU:
133 return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
134 case FPCompareOp::GT:
135 case FPCompareOp::GTU:
136 return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
137 case FPCompareOp::NE:
138 case FPCompareOp::NEU:
139 return ir.FPNotEqual(operand_1, operand_2, control, ordered);
140 case FPCompareOp::GE:
141 case FPCompareOp::GEU:
142 return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
143 case FPCompareOp::NUM:
144 return ir.FPOrdered(operand_1, operand_2);
145 case FPCompareOp::Nan:
146 return ir.FPUnordered(operand_1, operand_2);
147 case FPCompareOp::T:
148 return ir.Imm1(true);
149 default:
150 throw NotImplementedException("Invalid FP compare op {}", compare_op);
151 }
152}
153} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
new file mode 100644
index 000000000..214d0af3c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
12 const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
13
14[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
15 const IR::U32& operand_2, CompareOp compare_op,
16 bool is_signed);
17
18[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
19 const IR::U1& predicate_2, BooleanOp bop);
20
21[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
22
23[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
24
25[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
26 const IR::F16F32F64& operand_2, FPCompareOp compare_op,
27 IR::FpControl control = {});
28} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
new file mode 100644
index 000000000..420f2fb94
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12void TranslatorVisitor::CSET(u64 insn) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 5, IR::FlowTest> cc_test;
17 BitField<39, 3, IR::Pred> bop_pred;
18 BitField<42, 1, u64> neg_bop_pred;
19 BitField<44, 1, u64> bf;
20 BitField<45, 2, BooleanOp> bop;
21 BitField<47, 1, u64> cc;
22 } const cset{insn};
23
24 const IR::U32 one_mask{ir.Imm32(-1)};
25 const IR::U32 fp_one{ir.Imm32(0x3f800000)};
26 const IR::U32 zero{ir.Imm32(0)};
27 const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
28 const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
29 const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
30 const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
31 const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
32 X(cset.dest_reg, result);
33 if (cset.cc != 0) {
34 const IR::U1 is_zero{ir.IEqual(result, zero)};
35 SetZFlag(is_zero);
36 if (cset.bf != 0) {
37 ResetSFlag();
38 } else {
39 SetSFlag(ir.LogicalNot(is_zero));
40 }
41 ResetOFlag();
42 ResetCFlag();
43 }
44}
45
46void TranslatorVisitor::CSETP(u64 insn) {
47 union {
48 u64 raw;
49 BitField<0, 3, IR::Pred> dest_pred_b;
50 BitField<3, 3, IR::Pred> dest_pred_a;
51 BitField<8, 5, IR::FlowTest> cc_test;
52 BitField<39, 3, IR::Pred> bop_pred;
53 BitField<42, 1, u64> neg_bop_pred;
54 BitField<45, 2, BooleanOp> bop;
55 } const csetp{insn};
56
57 const BooleanOp bop{csetp.bop};
58 const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
59 const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
60 const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
61 const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
62 ir.SetPred(csetp.dest_pred_a, result_a);
63 ir.SetPred(csetp.dest_pred_b, result_b);
64}
65
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 000000000..5a1b3a8fc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<39, 2, FpRounding> fp_rounding;
19 BitField<45, 1, u64> neg_b;
20 BitField<46, 1, u64> abs_a;
21 BitField<47, 1, u64> cc;
22 BitField<48, 1, u64> neg_a;
23 BitField<49, 1, u64> abs_b;
24 } const dadd{insn};
25 if (dadd.cc != 0) {
26 throw NotImplementedException("DADD CC");
27 }
28
29 const IR::F64 src_a{v.D(dadd.src_a_reg)};
30 const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
31 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
32
33 const IR::FpControl control{
34 .no_contraction = true,
35 .rounding = CastFpRounding(dadd.fp_rounding),
36 .fmz_mode = IR::FmzMode::None,
37 };
38
39 v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
40}
41} // Anonymous namespace
42
43void TranslatorVisitor::DADD_reg(u64 insn) {
44 DADD(*this, insn, GetDoubleReg20(insn));
45}
46
47void TranslatorVisitor::DADD_cbuf(u64 insn) {
48 DADD(*this, insn, GetDoubleCbuf(insn));
49}
50
51void TranslatorVisitor::DADD_imm(u64 insn) {
52 DADD(*this, insn, GetDoubleImm20(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
new file mode 100644
index 000000000..1173192e4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
@@ -0,0 +1,72 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 BitField<43, 1, u64> negate_a;
20 BitField<44, 1, u64> abs_b;
21 BitField<45, 2, BooleanOp> bop;
22 BitField<47, 1, u64> cc;
23 BitField<48, 4, FPCompareOp> compare_op;
24 BitField<52, 1, u64> bf;
25 BitField<53, 1, u64> negate_b;
26 BitField<54, 1, u64> abs_a;
27 } const dset{insn};
28
29 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
30 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
31
32 IR::U1 pred{v.ir.GetPred(dset.pred)};
33 if (dset.neg_pred != 0) {
34 pred = v.ir.LogicalNot(pred);
35 }
36 const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
37 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
38
39 const IR::U32 one_mask{v.ir.Imm32(-1)};
40 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
41 const IR::U32 zero{v.ir.Imm32(0)};
42 const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
43 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
44
45 v.X(dset.dest_reg, result);
46 if (dset.cc != 0) {
47 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
48 v.SetZFlag(is_zero);
49 if (dset.bf != 0) {
50 v.ResetSFlag();
51 } else {
52 v.SetSFlag(v.ir.LogicalNot(is_zero));
53 }
54 v.ResetCFlag();
55 v.ResetOFlag();
56 }
57}
58} // Anonymous namespace
59
60void TranslatorVisitor::DSET_reg(u64 insn) {
61 DSET(*this, insn, GetDoubleReg20(insn));
62}
63
64void TranslatorVisitor::DSET_cbuf(u64 insn) {
65 DSET(*this, insn, GetDoubleCbuf(insn));
66}
67
68void TranslatorVisitor::DSET_imm(u64 insn) {
69 DSET(*this, insn, GetDoubleImm20(insn));
70}
71
72} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 000000000..f66097014
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<50, 2, FpRounding> fp_rounding;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> neg_b;
21 BitField<49, 1, u64> neg_c;
22 } const dfma{insn};
23
24 if (dfma.cc != 0) {
25 throw NotImplementedException("DFMA CC");
26 }
27
28 const IR::F64 src_a{v.D(dfma.src_a_reg)};
29 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
30 const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
31
32 const IR::FpControl control{
33 .no_contraction = true,
34 .rounding = CastFpRounding(dfma.fp_rounding),
35 .fmz_mode = IR::FmzMode::None,
36 };
37
38 v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
39}
40} // Anonymous namespace
41
42void TranslatorVisitor::DFMA_reg(u64 insn) {
43 DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
44}
45
46void TranslatorVisitor::DFMA_cr(u64 insn) {
47 DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
48}
49
50void TranslatorVisitor::DFMA_rc(u64 insn) {
51 DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
52}
53
54void TranslatorVisitor::DFMA_imm(u64 insn) {
55 DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
56}
57
58} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
new file mode 100644
index 000000000..6b551847c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<45, 1, u64> negate_b;
19 BitField<46, 1, u64> abs_a;
20 BitField<47, 1, u64> cc;
21 BitField<48, 1, u64> negate_a;
22 BitField<49, 1, u64> abs_b;
23 } const dmnmx{insn};
24
25 if (dmnmx.cc != 0) {
26 throw NotImplementedException("DMNMX CC");
27 }
28
29 const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
30 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
31 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
32
33 IR::F64 max{v.ir.FPMax(op_a, op_b)};
34 IR::F64 min{v.ir.FPMin(op_a, op_b)};
35
36 if (dmnmx.neg_pred != 0) {
37 std::swap(min, max);
38 }
39 v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
40}
41} // Anonymous namespace
42
43void TranslatorVisitor::DMNMX_reg(u64 insn) {
44 DMNMX(*this, insn, GetDoubleReg20(insn));
45}
46
47void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
48 DMNMX(*this, insn, GetDoubleCbuf(insn));
49}
50
51void TranslatorVisitor::DMNMX_imm(u64 insn) {
52 DMNMX(*this, insn, GetDoubleImm20(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 000000000..c0159fb65
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,50 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<39, 2, FpRounding> fp_rounding;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> neg;
21 } const dmul{insn};
22
23 if (dmul.cc != 0) {
24 throw NotImplementedException("DMUL CC");
25 }
26
27 const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
28 const IR::FpControl control{
29 .no_contraction = true,
30 .rounding = CastFpRounding(dmul.fp_rounding),
31 .fmz_mode = IR::FmzMode::None,
32 };
33
34 v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
35}
36} // Anonymous namespace
37
38void TranslatorVisitor::DMUL_reg(u64 insn) {
39 DMUL(*this, insn, GetDoubleReg20(insn));
40}
41
42void TranslatorVisitor::DMUL_cbuf(u64 insn) {
43 DMUL(*this, insn, GetDoubleCbuf(insn));
44}
45
46void TranslatorVisitor::DMUL_imm(u64 insn) {
47 DMUL(*this, insn, GetDoubleImm20(insn));
48}
49
50} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
new file mode 100644
index 000000000..b8e74ee44
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
@@ -0,0 +1,54 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<3, 3, IR::Pred> dest_pred_a;
17 BitField<6, 1, u64> negate_b;
18 BitField<7, 1, u64> abs_a;
19 BitField<8, 8, IR::Reg> src_a_reg;
20 BitField<39, 3, IR::Pred> bop_pred;
21 BitField<42, 1, u64> neg_bop_pred;
22 BitField<43, 1, u64> negate_a;
23 BitField<44, 1, u64> abs_b;
24 BitField<45, 2, BooleanOp> bop;
25 BitField<48, 4, FPCompareOp> compare_op;
26 } const dsetp{insn};
27
28 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
29 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
30
31 const BooleanOp bop{dsetp.bop};
32 const FPCompareOp compare_op{dsetp.compare_op};
33 const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
34 const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
35 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
36 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
37 v.ir.SetPred(dsetp.dest_pred_a, result_a);
38 v.ir.SetPred(dsetp.dest_pred_b, result_b);
39}
40} // Anonymous namespace
41
42void TranslatorVisitor::DSETP_reg(u64 insn) {
43 DSETP(*this, insn, GetDoubleReg20(insn));
44}
45
46void TranslatorVisitor::DSETP_cbuf(u64 insn) {
47 DSETP(*this, insn, GetDoubleCbuf(insn));
48}
49
50void TranslatorVisitor::DSETP_imm(u64 insn) {
51 DSETP(*this, insn, GetDoubleImm20(insn));
52}
53
54} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..c2443c886
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ExitFragment(TranslatorVisitor& v) {
12 const ProgramHeader sph{v.env.SPH()};
13 IR::Reg src_reg{IR::Reg::R0};
14 for (u32 render_target = 0; render_target < 8; ++render_target) {
15 const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
16 for (u32 component = 0; component < 4; ++component) {
17 if (!mask[component]) {
18 continue;
19 }
20 v.ir.SetFragColor(render_target, component, v.F(src_reg));
21 ++src_reg;
22 }
23 }
24 if (sph.ps.omap.sample_mask != 0) {
25 v.ir.SetSampleMask(v.X(src_reg));
26 }
27 if (sph.ps.omap.depth != 0) {
28 v.ir.SetFragDepth(v.F(src_reg + 1));
29 }
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::EXIT() {
34 switch (env.ShaderStage()) {
35 case Stage::Fragment:
36 ExitFragment(*this);
37 break;
38 default:
39 break;
40 }
41}
42
43} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
new file mode 100644
index 000000000..f0cb25d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
@@ -0,0 +1,47 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<40, 1, u64> tilde;
16 BitField<41, 1, u64> shift;
17 BitField<47, 1, u64> cc;
18 BitField<48, 1, u64> is_signed;
19 } const flo{insn};
20
21 if (flo.cc != 0) {
22 throw NotImplementedException("CC");
23 }
24 if (flo.tilde != 0) {
25 src = v.ir.BitwiseNot(src);
26 }
27 IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
28 if (flo.shift != 0) {
29 const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
30 result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
31 }
32 v.X(flo.dest_reg, result);
33}
34} // Anonymous namespace
35
36void TranslatorVisitor::FLO_reg(u64 insn) {
37 FLO(*this, insn, GetReg20(insn));
38}
39
40void TranslatorVisitor::FLO_cbuf(u64 insn) {
41 FLO(*this, insn, GetCbuf(insn));
42}
43
44void TranslatorVisitor::FLO_imm(u64 insn) {
45 FLO(*this, insn, GetImm20(insn));
46}
47} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..b8c89810c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,82 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
13 const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const fadd{insn};
19
20 if (cc) {
21 throw NotImplementedException("FADD CC");
22 }
23 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
24 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
25 IR::FpControl control{
26 .no_contraction = true,
27 .rounding = CastFpRounding(fp_rounding),
28 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
29 };
30 IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
31 if (sat) {
32 value = v.ir.FPSaturate(value);
33 }
34 v.F(fadd.dest_reg, value);
35}
36
37void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
38 union {
39 u64 raw;
40 BitField<39, 2, FpRounding> fp_rounding;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> neg_b;
43 BitField<46, 1, u64> abs_a;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> neg_a;
46 BitField<49, 1, u64> abs_b;
47 BitField<50, 1, u64> sat;
48 } const fadd{insn};
49
50 FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
51 fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::FADD_reg(u64 insn) {
56 FADD(*this, insn, GetFloatReg20(insn));
57}
58
59void TranslatorVisitor::FADD_cbuf(u64 insn) {
60 FADD(*this, insn, GetFloatCbuf(insn));
61}
62
63void TranslatorVisitor::FADD_imm(u64 insn) {
64 FADD(*this, insn, GetFloatImm20(insn));
65}
66
67void TranslatorVisitor::FADD32I(u64 insn) {
68 union {
69 u64 raw;
70 BitField<55, 1, u64> ftz;
71 BitField<56, 1, u64> neg_a;
72 BitField<54, 1, u64> abs_a;
73 BitField<52, 1, u64> cc;
74 BitField<53, 1, u64> neg_b;
75 BitField<57, 1, u64> abs_b;
76 } const fadd32i{insn};
77
78 FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
79 fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
80}
81
82} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
new file mode 100644
index 000000000..7127ebf54
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<47, 1, u64> ftz;
18 BitField<48, 4, FPCompareOp> compare_op;
19 } const fcmp{insn};
20
21 const IR::F32 zero{v.ir.Imm32(0.0f)};
22 const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
23 const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
24 const IR::U32 src_reg{v.X(fcmp.src_reg)};
25 const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
26
27 v.X(fcmp.dest_reg, result);
28}
29} // Anonymous namespace
30
31void TranslatorVisitor::FCMP_reg(u64 insn) {
32 FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
33}
34
35void TranslatorVisitor::FCMP_rc(u64 insn) {
36 FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
37}
38
39void TranslatorVisitor::FCMP_cr(u64 insn) {
40 FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
41}
42
43void TranslatorVisitor::FCMP_imm(u64 insn) {
44 union {
45 u64 raw;
46 BitField<20, 19, u64> value;
47 BitField<56, 1, u64> is_negative;
48 } const fcmp{insn};
49 const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
50 const u32 value{static_cast<u32>(fcmp.value) << 12};
51
52 FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
new file mode 100644
index 000000000..eece4f28f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -0,0 +1,78 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 BitField<43, 1, u64> negate_a;
20 BitField<44, 1, u64> abs_b;
21 BitField<45, 2, BooleanOp> bop;
22 BitField<47, 1, u64> cc;
23 BitField<48, 4, FPCompareOp> compare_op;
24 BitField<52, 1, u64> bf;
25 BitField<53, 1, u64> negate_b;
26 BitField<54, 1, u64> abs_a;
27 BitField<55, 1, u64> ftz;
28 } const fset{insn};
29
30 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
31 const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
32 const IR::FpControl control{
33 .no_contraction = false,
34 .rounding = IR::FpRounding::DontCare,
35 .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
36 };
37
38 IR::U1 pred{v.ir.GetPred(fset.pred)};
39 if (fset.neg_pred != 0) {
40 pred = v.ir.LogicalNot(pred);
41 }
42 const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
43 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
44
45 const IR::U32 one_mask{v.ir.Imm32(-1)};
46 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
47 const IR::U32 zero{v.ir.Imm32(0)};
48 const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
49 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
50
51 v.X(fset.dest_reg, result);
52 if (fset.cc != 0) {
53 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
54 v.SetZFlag(is_zero);
55 if (fset.bf != 0) {
56 v.ResetSFlag();
57 } else {
58 v.SetSFlag(v.ir.LogicalNot(is_zero));
59 }
60 v.ResetCFlag();
61 v.ResetOFlag();
62 }
63}
64} // Anonymous namespace
65
66void TranslatorVisitor::FSET_reg(u64 insn) {
67 FSET(*this, insn, GetFloatReg20(insn));
68}
69
70void TranslatorVisitor::FSET_cbuf(u64 insn) {
71 FSET(*this, insn, GetFloatCbuf(insn));
72}
73
74void TranslatorVisitor::FSET_imm(u64 insn) {
75 FSET(*this, insn, GetFloatImm20(insn));
76}
77
78} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
new file mode 100644
index 000000000..02ab023c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -0,0 +1,214 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
6#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
7
8namespace Shader::Maxwell {
9namespace {
10enum class FloatFormat : u64 {
11 F16 = 1,
12 F32 = 2,
13 F64 = 3,
14};
15
16enum class RoundingOp : u64 {
17 None = 0,
18 Pass = 3,
19 Round = 8,
20 Floor = 9,
21 Ceil = 10,
22 Trunc = 11,
23};
24
25[[nodiscard]] u32 WidthSize(FloatFormat width) {
26 switch (width) {
27 case FloatFormat::F16:
28 return 16;
29 case FloatFormat::F32:
30 return 32;
31 case FloatFormat::F64:
32 return 64;
33 default:
34 throw NotImplementedException("Invalid width {}", width);
35 }
36}
37
38void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
39 union {
40 u64 insn;
41 BitField<0, 8, IR::Reg> dest_reg;
42 BitField<44, 1, u64> ftz;
43 BitField<45, 1, u64> neg;
44 BitField<47, 1, u64> cc;
45 BitField<50, 1, u64> sat;
46 BitField<39, 4, u64> rounding_op;
47 BitField<39, 2, FpRounding> rounding;
48 BitField<10, 2, FloatFormat> src_size;
49 BitField<8, 2, FloatFormat> dst_size;
50
51 [[nodiscard]] RoundingOp RoundingOperation() const {
52 constexpr u64 rounding_mask = 0x0B;
53 return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
54 }
55 } const f2f{insn};
56
57 if (f2f.cc != 0) {
58 throw NotImplementedException("F2F CC");
59 }
60
61 IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
62
63 const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
64 IR::FpControl fp_control{
65 .no_contraction = false,
66 .rounding = IR::FpRounding::DontCare,
67 .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
68 };
69 if (f2f.src_size != f2f.dst_size) {
70 fp_control.rounding = CastFpRounding(f2f.rounding);
71 input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
72 } else {
73 switch (f2f.RoundingOperation()) {
74 case RoundingOp::None:
75 case RoundingOp::Pass:
76 // Make sure NANs are handled properly
77 switch (f2f.src_size) {
78 case FloatFormat::F16:
79 input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
80 break;
81 case FloatFormat::F32:
82 input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
83 break;
84 case FloatFormat::F64:
85 input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
86 break;
87 }
88 break;
89 case RoundingOp::Round:
90 input = v.ir.FPRoundEven(input, fp_control);
91 break;
92 case RoundingOp::Floor:
93 input = v.ir.FPFloor(input, fp_control);
94 break;
95 case RoundingOp::Ceil:
96 input = v.ir.FPCeil(input, fp_control);
97 break;
98 case RoundingOp::Trunc:
99 input = v.ir.FPTrunc(input, fp_control);
100 break;
101 default:
102 throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
103 }
104 }
105 if (f2f.sat != 0 && !any_fp64) {
106 input = v.ir.FPSaturate(input);
107 }
108
109 switch (f2f.dst_size) {
110 case FloatFormat::F16: {
111 const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
112 v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
113 break;
114 }
115 case FloatFormat::F32:
116 v.F(f2f.dest_reg, input);
117 break;
118 case FloatFormat::F64:
119 v.D(f2f.dest_reg, input);
120 break;
121 default:
122 throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
123 }
124}
125} // Anonymous namespace
126
127void TranslatorVisitor::F2F_reg(u64 insn) {
128 union {
129 u64 insn;
130 BitField<49, 1, u64> abs;
131 BitField<10, 2, FloatFormat> src_size;
132 BitField<41, 1, u64> selector;
133 } const f2f{insn};
134
135 IR::F16F32F64 src_a;
136 switch (f2f.src_size) {
137 case FloatFormat::F16: {
138 auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
139 src_a = f2f.selector != 0 ? rhs_a : lhs_a;
140 break;
141 }
142 case FloatFormat::F32:
143 src_a = GetFloatReg20(insn);
144 break;
145 case FloatFormat::F64:
146 src_a = GetDoubleReg20(insn);
147 break;
148 default:
149 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
150 }
151 F2F(*this, insn, src_a, f2f.abs != 0);
152}
153
154void TranslatorVisitor::F2F_cbuf(u64 insn) {
155 union {
156 u64 insn;
157 BitField<49, 1, u64> abs;
158 BitField<10, 2, FloatFormat> src_size;
159 BitField<41, 1, u64> selector;
160 } const f2f{insn};
161
162 IR::F16F32F64 src_a;
163 switch (f2f.src_size) {
164 case FloatFormat::F16: {
165 auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
166 src_a = f2f.selector != 0 ? rhs_a : lhs_a;
167 break;
168 }
169 case FloatFormat::F32:
170 src_a = GetFloatCbuf(insn);
171 break;
172 case FloatFormat::F64:
173 src_a = GetDoubleCbuf(insn);
174 break;
175 default:
176 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
177 }
178 F2F(*this, insn, src_a, f2f.abs != 0);
179}
180
181void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
182 union {
183 u64 insn;
184 BitField<49, 1, u64> abs;
185 BitField<10, 2, FloatFormat> src_size;
186 BitField<41, 1, u64> selector;
187 BitField<20, 19, u64> imm;
188 BitField<56, 1, u64> imm_neg;
189 } const f2f{insn};
190
191 IR::F16F32F64 src_a;
192 switch (f2f.src_size) {
193 case FloatFormat::F16: {
194 const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
195 const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
196 src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
197 if (f2f.imm_neg != 0) {
198 throw NotImplementedException("Neg bit on F16");
199 }
200 break;
201 }
202 case FloatFormat::F32:
203 src_a = GetFloatImm20(insn);
204 break;
205 case FloatFormat::F64:
206 src_a = GetDoubleImm20(insn);
207 break;
208 default:
209 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
210 }
211 F2F(*this, insn, src_a, f2f.abs != 0);
212}
213
214} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
new file mode 100644
index 000000000..92b1ce015
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -0,0 +1,253 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/opcodes.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class DestFormat : u64 {
15 Invalid,
16 I16,
17 I32,
18 I64,
19};
20enum class SrcFormat : u64 {
21 Invalid,
22 F16,
23 F32,
24 F64,
25};
26enum class Rounding : u64 {
27 Round,
28 Floor,
29 Ceil,
30 Trunc,
31};
32
33union F2I {
34 u64 raw;
35 BitField<0, 8, IR::Reg> dest_reg;
36 BitField<8, 2, DestFormat> dest_format;
37 BitField<10, 2, SrcFormat> src_format;
38 BitField<12, 1, u64> is_signed;
39 BitField<39, 2, Rounding> rounding;
40 BitField<41, 1, u64> half;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> abs;
43 BitField<47, 1, u64> cc;
44 BitField<49, 1, u64> neg;
45};
46
47size_t BitSize(DestFormat dest_format) {
48 switch (dest_format) {
49 case DestFormat::I16:
50 return 16;
51 case DestFormat::I32:
52 return 32;
53 case DestFormat::I64:
54 return 64;
55 default:
56 throw NotImplementedException("Invalid destination format {}", dest_format);
57 }
58}
59
60std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
61 if (is_signed) {
62 switch (format) {
63 case DestFormat::I16:
64 return {static_cast<f64>(std::numeric_limits<s16>::max()),
65 static_cast<f64>(std::numeric_limits<s16>::min())};
66 case DestFormat::I32:
67 return {static_cast<f64>(std::numeric_limits<s32>::max()),
68 static_cast<f64>(std::numeric_limits<s32>::min())};
69 case DestFormat::I64:
70 return {static_cast<f64>(std::numeric_limits<s64>::max()),
71 static_cast<f64>(std::numeric_limits<s64>::min())};
72 default:
73 break;
74 }
75 } else {
76 switch (format) {
77 case DestFormat::I16:
78 return {static_cast<f64>(std::numeric_limits<u16>::max()),
79 static_cast<f64>(std::numeric_limits<u16>::min())};
80 case DestFormat::I32:
81 return {static_cast<f64>(std::numeric_limits<u32>::max()),
82 static_cast<f64>(std::numeric_limits<u32>::min())};
83 case DestFormat::I64:
84 return {static_cast<f64>(std::numeric_limits<u64>::max()),
85 static_cast<f64>(std::numeric_limits<u64>::min())};
86 default:
87 break;
88 }
89 }
90 throw NotImplementedException("Invalid destination format {}", format);
91}
92
93IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
94 union {
95 u64 raw;
96 BitField<20, 14, s64> offset;
97 BitField<34, 5, u64> binding;
98 } const cbuf{insn};
99 if (cbuf.binding >= 18) {
100 throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
101 }
102 if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
103 throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
104 }
105 if (cbuf.offset % 2 != 0) {
106 throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
107 }
108 const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
109 const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
110 const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
111 const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
112 return v.ir.PackDouble2x32(vector);
113}
114
115void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
116 // F2I is used to convert from a floating point value to an integer
117 const F2I f2i{insn};
118
119 const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
120 f2i.dest_format != DestFormat::I64};
121 IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
122 if (denorm_cares) {
123 fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
124 }
125 const IR::FpControl fp_control{
126 .no_contraction = true,
127 .rounding = IR::FpRounding::DontCare,
128 .fmz_mode = fmz_mode,
129 };
130 const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
131 const IR::F16F32F64 rounded_value{[&] {
132 switch (f2i.rounding) {
133 case Rounding::Round:
134 return v.ir.FPRoundEven(op_a, fp_control);
135 case Rounding::Floor:
136 return v.ir.FPFloor(op_a, fp_control);
137 case Rounding::Ceil:
138 return v.ir.FPCeil(op_a, fp_control);
139 case Rounding::Trunc:
140 return v.ir.FPTrunc(op_a, fp_control);
141 default:
142 throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
143 }
144 }()};
145 const bool is_signed{f2i.is_signed != 0};
146 const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
147
148 IR::F16F32F64 intermediate;
149 switch (f2i.src_format) {
150 case SrcFormat::F16: {
151 const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
152 const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
153 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
154 break;
155 }
156 case SrcFormat::F32: {
157 const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
158 const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
159 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
160 break;
161 }
162 case SrcFormat::F64: {
163 const IR::F64 max_val{v.ir.Imm64(max_bound)};
164 const IR::F64 min_val{v.ir.Imm64(min_bound)};
165 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
166 break;
167 }
168 default:
169 throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
170 }
171
172 const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
173 IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
174
175 bool handled_special_case = false;
176 const bool special_nan_cases =
177 (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
178 if (special_nan_cases) {
179 if (f2i.dest_format == DestFormat::I32) {
180 handled_special_case = true;
181 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
182 } else if (f2i.dest_format == DestFormat::I64) {
183 handled_special_case = true;
184 result = IR::U64{
185 v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
186 }
187 }
188 if (!handled_special_case && is_signed) {
189 if (bitsize != 64) {
190 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
191 } else {
192 result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
193 }
194 }
195
196 if (bitsize == 64) {
197 v.L(f2i.dest_reg, result);
198 } else {
199 v.X(f2i.dest_reg, result);
200 }
201
202 if (f2i.cc != 0) {
203 throw NotImplementedException("F2I CC");
204 }
205}
206} // Anonymous namespace
207
208void TranslatorVisitor::F2I_reg(u64 insn) {
209 union {
210 u64 raw;
211 F2I base;
212 BitField<20, 8, IR::Reg> src_reg;
213 } const f2i{insn};
214
215 const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
216 switch (f2i.base.src_format) {
217 case SrcFormat::F16:
218 return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
219 case SrcFormat::F32:
220 return F(f2i.src_reg);
221 case SrcFormat::F64:
222 return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
223 default:
224 throw NotImplementedException("Invalid F2I source format {}",
225 f2i.base.src_format.Value());
226 }
227 }()};
228 TranslateF2I(*this, insn, op_a);
229}
230
231void TranslatorVisitor::F2I_cbuf(u64 insn) {
232 const F2I f2i{insn};
233 const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
234 switch (f2i.src_format) {
235 case SrcFormat::F16:
236 return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
237 case SrcFormat::F32:
238 return GetFloatCbuf(insn);
239 case SrcFormat::F64: {
240 return UnpackCbuf(*this, insn);
241 }
242 default:
243 throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
244 }
245 }()};
246 TranslateF2I(*this, insn, op_a);
247}
248
249void TranslatorVisitor::F2I_imm(u64) {
250 throw NotImplementedException("{}", Opcode::F2I_imm);
251}
252
253} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fa2a7807b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,94 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
13 bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const ffma{insn};
19
20 if (cc) {
21 throw NotImplementedException("FFMA CC");
22 }
23 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
24 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
25 const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
26 const IR::FpControl fp_control{
27 .no_contraction = true,
28 .rounding = CastFpRounding(fp_rounding),
29 .fmz_mode = CastFmzMode(fmz_mode),
30 };
31 IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
32 if (fmz_mode == FmzMode::FMZ && !sat) {
33 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
34 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
35 const IR::F32 zero{v.ir.Imm32(0.0f)};
36 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
37 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
38 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
39 value = IR::F32{v.ir.Select(any_zero, op_c, value)};
40 }
41 if (sat) {
42 value = v.ir.FPSaturate(value);
43 }
44 v.F(ffma.dest_reg, value);
45}
46
47void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
48 union {
49 u64 raw;
50 BitField<47, 1, u64> cc;
51 BitField<48, 1, u64> neg_b;
52 BitField<49, 1, u64> neg_c;
53 BitField<50, 1, u64> sat;
54 BitField<51, 2, FpRounding> fp_rounding;
55 BitField<53, 2, FmzMode> fmz_mode;
56 } const ffma{insn};
57
58 FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
59 ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
60}
61} // Anonymous namespace
62
63void TranslatorVisitor::FFMA_reg(u64 insn) {
64 FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
65}
66
67void TranslatorVisitor::FFMA_rc(u64 insn) {
68 FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
69}
70
71void TranslatorVisitor::FFMA_cr(u64 insn) {
72 FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
73}
74
75void TranslatorVisitor::FFMA_imm(u64 insn) {
76 FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
77}
78
79void TranslatorVisitor::FFMA32I(u64 insn) {
80 union {
81 u64 raw;
82 BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
83 BitField<52, 1, u64> cc;
84 BitField<53, 2, FmzMode> fmz_mode;
85 BitField<55, 1, u64> sat;
86 BitField<56, 1, u64> neg_a;
87 BitField<57, 1, u64> neg_c;
88 } const ffma32i{insn};
89
90 FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
91 ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
92}
93
94} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
new file mode 100644
index 000000000..c0d6ee5af
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<44, 1, u64> ftz;
19 BitField<45, 1, u64> negate_b;
20 BitField<46, 1, u64> abs_a;
21 BitField<47, 1, u64> cc;
22 BitField<48, 1, u64> negate_a;
23 BitField<49, 1, u64> abs_b;
24 } const fmnmx{insn};
25
26 if (fmnmx.cc) {
27 throw NotImplementedException("FMNMX CC");
28 }
29
30 const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
31 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
32 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
33
34 const IR::FpControl control{
35 .no_contraction = false,
36 .rounding = IR::FpRounding::DontCare,
37 .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
38 };
39 IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
40 IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
41
42 if (fmnmx.neg_pred != 0) {
43 std::swap(min, max);
44 }
45
46 v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
47}
48} // Anonymous namespace
49
50void TranslatorVisitor::FMNMX_reg(u64 insn) {
51 FMNMX(*this, insn, GetFloatReg20(insn));
52}
53
54void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
55 FMNMX(*this, insn, GetFloatCbuf(insn));
56}
57
58void TranslatorVisitor::FMNMX_imm(u64 insn) {
59 FMNMX(*this, insn, GetFloatImm20(insn));
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
new file mode 100644
index 000000000..2f8605619
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class Operation : u64 {
14 Cos = 0,
15 Sin = 1,
16 Ex2 = 2, // Base 2 exponent
17 Lg2 = 3, // Base 2 logarithm
18 Rcp = 4, // Reciprocal
19 Rsq = 5, // Reciprocal square root
20 Rcp64H = 6, // 64-bit reciprocal
21 Rsq64H = 7, // 64-bit reciprocal square root
22 Sqrt = 8,
23};
24} // Anonymous namespace
25
26void TranslatorVisitor::MUFU(u64 insn) {
27 // MUFU is used to implement a bunch of special functions. See Operation.
28 union {
29 u64 raw;
30 BitField<0, 8, IR::Reg> dest_reg;
31 BitField<8, 8, IR::Reg> src_reg;
32 BitField<20, 4, Operation> operation;
33 BitField<46, 1, u64> abs;
34 BitField<48, 1, u64> neg;
35 BitField<50, 1, u64> sat;
36 } const mufu{insn};
37
38 const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
39 IR::F32 value{[&]() -> IR::F32 {
40 switch (mufu.operation) {
41 case Operation::Cos:
42 return ir.FPCos(op_a);
43 case Operation::Sin:
44 return ir.FPSin(op_a);
45 case Operation::Ex2:
46 return ir.FPExp2(op_a);
47 case Operation::Lg2:
48 return ir.FPLog2(op_a);
49 case Operation::Rcp:
50 return ir.FPRecip(op_a);
51 case Operation::Rsq:
52 return ir.FPRecipSqrt(op_a);
53 case Operation::Rcp64H:
54 throw NotImplementedException("MUFU.RCP64H");
55 case Operation::Rsq64H:
56 throw NotImplementedException("MUFU.RSQ64H");
57 case Operation::Sqrt:
58 return ir.FPSqrt(op_a);
59 default:
60 throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
61 }
62 }()};
63
64 if (mufu.sat) {
65 value = ir.FPSaturate(value);
66 }
67
68 F(mufu.dest_reg, value);
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..06226b7ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,127 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/ir_emitter.h"
8#include "shader_recompiler/frontend/ir/modifiers.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Scale : u64 {
15 None,
16 D2,
17 D4,
18 D8,
19 M8,
20 M4,
21 M2,
22 INVALIDSCALE37,
23};
24
25float ScaleFactor(Scale scale) {
26 switch (scale) {
27 case Scale::None:
28 return 1.0f;
29 case Scale::D2:
30 return 1.0f / 2.0f;
31 case Scale::D4:
32 return 1.0f / 4.0f;
33 case Scale::D8:
34 return 1.0f / 8.0f;
35 case Scale::M8:
36 return 8.0f;
37 case Scale::M4:
38 return 4.0f;
39 case Scale::M2:
40 return 2.0f;
41 case Scale::INVALIDSCALE37:
42 break;
43 }
44 throw NotImplementedException("Invalid FMUL scale {}", scale);
45}
46
47void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
48 FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
49 union {
50 u64 raw;
51 BitField<0, 8, IR::Reg> dest_reg;
52 BitField<8, 8, IR::Reg> src_a;
53 } const fmul{insn};
54
55 if (cc) {
56 throw NotImplementedException("FMUL CC");
57 }
58 IR::F32 op_a{v.F(fmul.src_a)};
59 if (scale != Scale::None) {
60 if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
61 throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
62 }
63 op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
64 }
65 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
66 const IR::FpControl fp_control{
67 .no_contraction = true,
68 .rounding = CastFpRounding(fp_rounding),
69 .fmz_mode = CastFmzMode(fmz_mode),
70 };
71 IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
72 if (fmz_mode == FmzMode::FMZ && !sat) {
73 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
74 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
75 const IR::F32 zero{v.ir.Imm32(0.0f)};
76 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
77 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
78 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
79 value = IR::F32{v.ir.Select(any_zero, zero, value)};
80 }
81 if (sat) {
82 value = v.ir.FPSaturate(value);
83 }
84 v.F(fmul.dest_reg, value);
85}
86
87void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
88 union {
89 u64 raw;
90 BitField<39, 2, FpRounding> fp_rounding;
91 BitField<41, 3, Scale> scale;
92 BitField<44, 2, FmzMode> fmz;
93 BitField<47, 1, u64> cc;
94 BitField<48, 1, u64> neg_b;
95 BitField<50, 1, u64> sat;
96 } const fmul{insn};
97
98 FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
99 fmul.neg_b != 0);
100}
101} // Anonymous namespace
102
103void TranslatorVisitor::FMUL_reg(u64 insn) {
104 return FMUL(*this, insn, GetFloatReg20(insn));
105}
106
107void TranslatorVisitor::FMUL_cbuf(u64 insn) {
108 return FMUL(*this, insn, GetFloatCbuf(insn));
109}
110
111void TranslatorVisitor::FMUL_imm(u64 insn) {
112 return FMUL(*this, insn, GetFloatImm20(insn));
113}
114
115void TranslatorVisitor::FMUL32I(u64 insn) {
116 union {
117 u64 raw;
118 BitField<52, 1, u64> cc;
119 BitField<53, 2, FmzMode> fmz;
120 BitField<55, 1, u64> sat;
121 } const fmul32i{insn};
122
123 FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
124 fmul32i.sat != 0, fmul32i.cc != 0, false);
125}
126
127} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 SINCOS,
13 EX2,
14};
15
16void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
17 union {
18 u64 raw;
19 BitField<0, 8, IR::Reg> dest_reg;
20 BitField<39, 1, Mode> mode;
21 BitField<45, 1, u64> neg;
22 BitField<49, 1, u64> abs;
23 } const rro{insn};
24
25 v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
26}
27} // Anonymous namespace
28
29void TranslatorVisitor::RRO_reg(u64 insn) {
30 RRO(*this, insn, GetFloatReg20(insn));
31}
32
33void TranslatorVisitor::RRO_cbuf(u64 insn) {
34 RRO(*this, insn, GetFloatCbuf(insn));
35}
36
37void TranslatorVisitor::RRO_imm(u64) {
38 throw NotImplementedException("RRO (imm)");
39}
40
41} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
new file mode 100644
index 000000000..5f93a1513
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -0,0 +1,60 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<3, 3, IR::Pred> dest_pred_a;
17 BitField<6, 1, u64> negate_b;
18 BitField<7, 1, u64> abs_a;
19 BitField<8, 8, IR::Reg> src_a_reg;
20 BitField<39, 3, IR::Pred> bop_pred;
21 BitField<42, 1, u64> neg_bop_pred;
22 BitField<43, 1, u64> negate_a;
23 BitField<44, 1, u64> abs_b;
24 BitField<45, 2, BooleanOp> bop;
25 BitField<47, 1, u64> ftz;
26 BitField<48, 4, FPCompareOp> compare_op;
27 } const fsetp{insn};
28
29 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
30 const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
31 const IR::FpControl control{
32 .no_contraction = false,
33 .rounding = IR::FpRounding::DontCare,
34 .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
35 };
36
37 const BooleanOp bop{fsetp.bop};
38 const FPCompareOp compare_op{fsetp.compare_op};
39 const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
40 const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
41 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
42 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
43 v.ir.SetPred(fsetp.dest_pred_a, result_a);
44 v.ir.SetPred(fsetp.dest_pred_b, result_b);
45}
46} // Anonymous namespace
47
48void TranslatorVisitor::FSETP_reg(u64 insn) {
49 FSETP(*this, insn, GetFloatReg20(insn));
50}
51
52void TranslatorVisitor::FSETP_cbuf(u64 insn) {
53 FSETP(*this, insn, GetFloatCbuf(insn));
54}
55
56void TranslatorVisitor::FSETP_imm(u64 insn) {
57 FSETP(*this, insn, GetFloatImm20(insn));
58}
59
60} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 000000000..7550a8d4c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::FSWZADD(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<28, 8, u64> swizzle;
16 BitField<38, 1, u64> ndv;
17 BitField<39, 2, FpRounding> round;
18 BitField<44, 1, u64> ftz;
19 BitField<47, 1, u64> cc;
20 } const fswzadd{insn};
21
22 if (fswzadd.ndv != 0) {
23 throw NotImplementedException("FSWZADD NDV");
24 }
25
26 const IR::F32 src_a{GetFloatReg8(insn)};
27 const IR::F32 src_b{GetFloatReg20(insn)};
28 const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
29
30 const IR::FpControl fp_control{
31 .no_contraction = false,
32 .rounding = CastFpRounding(fswzadd.round),
33 .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
34 };
35
36 const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
37 F(fswzadd.dest_reg, result);
38
39 if (fswzadd.cc != 0) {
40 throw NotImplementedException("FSWZADD CC");
41 }
42}
43
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..f2738a93b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,125 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
10 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
11 union {
12 u64 raw;
13 BitField<0, 8, IR::Reg> dest_reg;
14 BitField<8, 8, IR::Reg> src_a;
15 } const hadd2{insn};
16
17 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
18 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
19 const bool promotion{lhs_a.Type() != lhs_b.Type()};
20 if (promotion) {
21 if (lhs_a.Type() == IR::Type::F16) {
22 lhs_a = v.ir.FPConvert(32, lhs_a);
23 rhs_a = v.ir.FPConvert(32, rhs_a);
24 }
25 if (lhs_b.Type() == IR::Type::F16) {
26 lhs_b = v.ir.FPConvert(32, lhs_b);
27 rhs_b = v.ir.FPConvert(32, rhs_b);
28 }
29 }
30 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
31 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
32
33 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
34 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
35
36 const IR::FpControl fp_control{
37 .no_contraction = true,
38 .rounding = IR::FpRounding::DontCare,
39 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
40 };
41 IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
42 IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
43 if (sat) {
44 lhs = v.ir.FPSaturate(lhs);
45 rhs = v.ir.FPSaturate(rhs);
46 }
47 if (promotion) {
48 lhs = v.ir.FPConvert(16, lhs);
49 rhs = v.ir.FPConvert(16, rhs);
50 }
51 v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
52}
53
54void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
55 const IR::U32& src_b) {
56 union {
57 u64 raw;
58 BitField<49, 2, Merge> merge;
59 BitField<39, 1, u64> ftz;
60 BitField<43, 1, u64> neg_a;
61 BitField<44, 1, u64> abs_a;
62 BitField<47, 2, Swizzle> swizzle_a;
63 } const hadd2{insn};
64
65 HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
66 hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
67}
68} // Anonymous namespace
69
70void TranslatorVisitor::HADD2_reg(u64 insn) {
71 union {
72 u64 raw;
73 BitField<32, 1, u64> sat;
74 BitField<31, 1, u64> neg_b;
75 BitField<30, 1, u64> abs_b;
76 BitField<28, 2, Swizzle> swizzle_b;
77 } const hadd2{insn};
78
79 HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
80 GetReg20(insn));
81}
82
83void TranslatorVisitor::HADD2_cbuf(u64 insn) {
84 union {
85 u64 raw;
86 BitField<52, 1, u64> sat;
87 BitField<56, 1, u64> neg_b;
88 BitField<54, 1, u64> abs_b;
89 } const hadd2{insn};
90
91 HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
92 GetCbuf(insn));
93}
94
95void TranslatorVisitor::HADD2_imm(u64 insn) {
96 union {
97 u64 raw;
98 BitField<52, 1, u64> sat;
99 BitField<56, 1, u64> neg_high;
100 BitField<30, 9, u64> high;
101 BitField<29, 1, u64> neg_low;
102 BitField<20, 9, u64> low;
103 } const hadd2{insn};
104
105 const u32 imm{
106 static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
107 static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
108 HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
109}
110
111void TranslatorVisitor::HADD2_32I(u64 insn) {
112 union {
113 u64 raw;
114 BitField<55, 1, u64> ftz;
115 BitField<52, 1, u64> sat;
116 BitField<56, 1, u64> neg_a;
117 BitField<53, 2, Swizzle> swizzle_a;
118 BitField<20, 32, u64> imm32;
119 } const hadd2{insn};
120
121 const u32 imm{static_cast<u32>(hadd2.imm32)};
122 HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
123 hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
124}
125} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fd7986701
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
10 Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
11 bool sat, HalfPrecision precision) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a;
16 } const hfma2{insn};
17
18 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
19 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
20 auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
21 const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
22 if (promotion) {
23 if (lhs_a.Type() == IR::Type::F16) {
24 lhs_a = v.ir.FPConvert(32, lhs_a);
25 rhs_a = v.ir.FPConvert(32, rhs_a);
26 }
27 if (lhs_b.Type() == IR::Type::F16) {
28 lhs_b = v.ir.FPConvert(32, lhs_b);
29 rhs_b = v.ir.FPConvert(32, rhs_b);
30 }
31 if (lhs_c.Type() == IR::Type::F16) {
32 lhs_c = v.ir.FPConvert(32, lhs_c);
33 rhs_c = v.ir.FPConvert(32, rhs_c);
34 }
35 }
36
37 lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
38 rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
39
40 lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
41 rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
42
43 const IR::FpControl fp_control{
44 .no_contraction = true,
45 .rounding = IR::FpRounding::DontCare,
46 .fmz_mode = HalfPrecision2FmzMode(precision),
47 };
48 IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
49 IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
50 if (precision == HalfPrecision::FMZ && !sat) {
51 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
52 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
53 const IR::F32 zero{v.ir.Imm32(0.0f)};
54 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
55 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
56 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
57 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
58
59 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
60 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
61 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
62 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
63 }
64 if (sat) {
65 lhs = v.ir.FPSaturate(lhs);
66 rhs = v.ir.FPSaturate(rhs);
67 }
68 if (promotion) {
69 lhs = v.ir.FPConvert(16, lhs);
70 rhs = v.ir.FPConvert(16, rhs);
71 }
72 v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
73}
74
75void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
76 Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
77 HalfPrecision precision) {
78 union {
79 u64 raw;
80 BitField<47, 2, Swizzle> swizzle_a;
81 BitField<49, 2, Merge> merge;
82 } const hfma2{insn};
83
84 HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
85 sat, precision);
86}
87} // Anonymous namespace
88
89void TranslatorVisitor::HFMA2_reg(u64 insn) {
90 union {
91 u64 raw;
92 BitField<28, 2, Swizzle> swizzle_b;
93 BitField<32, 1, u64> saturate;
94 BitField<31, 1, u64> neg_b;
95 BitField<30, 1, u64> neg_c;
96 BitField<35, 2, Swizzle> swizzle_c;
97 BitField<37, 2, HalfPrecision> precision;
98 } const hfma2{insn};
99
100 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
101 GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
102}
103
104void TranslatorVisitor::HFMA2_rc(u64 insn) {
105 union {
106 u64 raw;
107 BitField<51, 1, u64> neg_c;
108 BitField<52, 1, u64> saturate;
109 BitField<53, 2, Swizzle> swizzle_b;
110 BitField<56, 1, u64> neg_b;
111 BitField<57, 2, HalfPrecision> precision;
112 } const hfma2{insn};
113
114 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
115 GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
116}
117
118void TranslatorVisitor::HFMA2_cr(u64 insn) {
119 union {
120 u64 raw;
121 BitField<51, 1, u64> neg_c;
122 BitField<52, 1, u64> saturate;
123 BitField<53, 2, Swizzle> swizzle_c;
124 BitField<56, 1, u64> neg_b;
125 BitField<57, 2, HalfPrecision> precision;
126 } const hfma2{insn};
127
128 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
129 GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
130}
131
132void TranslatorVisitor::HFMA2_imm(u64 insn) {
133 union {
134 u64 raw;
135 BitField<51, 1, u64> neg_c;
136 BitField<52, 1, u64> saturate;
137 BitField<53, 2, Swizzle> swizzle_c;
138
139 BitField<56, 1, u64> neg_high;
140 BitField<30, 9, u64> high;
141 BitField<29, 1, u64> neg_low;
142 BitField<20, 9, u64> low;
143 BitField<57, 2, HalfPrecision> precision;
144 } const hfma2{insn};
145
146 const u32 imm{
147 static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
148 static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
149
150 HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
151 GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
152}
153
154void TranslatorVisitor::HFMA2_32I(u64 insn) {
155 union {
156 u64 raw;
157 BitField<0, 8, IR::Reg> src_c;
158 BitField<20, 32, u64> imm32;
159 BitField<52, 1, u64> neg_c;
160 BitField<53, 2, Swizzle> swizzle_a;
161 BitField<55, 2, HalfPrecision> precision;
162 } const hfma2{insn};
163
164 const u32 imm{static_cast<u32>(hfma2.imm32)};
165 HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
166 Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
167}
168
169} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
new file mode 100644
index 000000000..0dbeb7f56
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8
9IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
10 switch (precision) {
11 case HalfPrecision::None:
12 return IR::FmzMode::None;
13 case HalfPrecision::FTZ:
14 return IR::FmzMode::FTZ;
15 case HalfPrecision::FMZ:
16 return IR::FmzMode::FMZ;
17 default:
18 return IR::FmzMode::DontCare;
19 }
20}
21
22std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
23 switch (swizzle) {
24 case Swizzle::H1_H0: {
25 const IR::Value vector{ir.UnpackFloat2x16(value)};
26 return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
27 }
28 case Swizzle::H0_H0: {
29 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
30 return {scalar, scalar};
31 }
32 case Swizzle::H1_H1: {
33 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
34 return {scalar, scalar};
35 }
36 case Swizzle::F32: {
37 const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
38 return {scalar, scalar};
39 }
40 }
41 throw InvalidArgument("Invalid swizzle {}", swizzle);
42}
43
44IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
45 Merge merge) {
46 switch (merge) {
47 case Merge::H1_H0:
48 return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
49 case Merge::F32:
50 return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
51 case Merge::MRG_H0:
52 case Merge::MRG_H1: {
53 const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
54 const bool is_h0{merge == Merge::MRG_H0};
55 const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
56 return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
57 }
58 }
59 throw InvalidArgument("Invalid merge {}", merge);
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
new file mode 100644
index 000000000..59da56a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -0,0 +1,42 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14
15enum class Merge : u64 {
16 H1_H0,
17 F32,
18 MRG_H0,
19 MRG_H1,
20};
21
22enum class Swizzle : u64 {
23 H1_H0,
24 F32,
25 H0_H0,
26 H1_H1,
27};
28
29enum class HalfPrecision : u64 {
30 None = 0,
31 FTZ = 1,
32 FMZ = 2,
33};
34
35IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
36
37std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
38
39IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
40 Merge merge);
41
42} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..3f548ce76
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
10 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
11 HalfPrecision precision) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a;
16 } const hmul2{insn};
17
18 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
19 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
20 const bool promotion{lhs_a.Type() != lhs_b.Type()};
21 if (promotion) {
22 if (lhs_a.Type() == IR::Type::F16) {
23 lhs_a = v.ir.FPConvert(32, lhs_a);
24 rhs_a = v.ir.FPConvert(32, rhs_a);
25 }
26 if (lhs_b.Type() == IR::Type::F16) {
27 lhs_b = v.ir.FPConvert(32, lhs_b);
28 rhs_b = v.ir.FPConvert(32, rhs_b);
29 }
30 }
31 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
32 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
33
34 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
35 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
36
37 const IR::FpControl fp_control{
38 .no_contraction = true,
39 .rounding = IR::FpRounding::DontCare,
40 .fmz_mode = HalfPrecision2FmzMode(precision),
41 };
42 IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
43 IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
44 if (precision == HalfPrecision::FMZ && !sat) {
45 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
46 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
47 const IR::F32 zero{v.ir.Imm32(0.0f)};
48 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
49 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
50 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
51 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
52
53 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
54 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
55 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
56 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
57 }
58 if (sat) {
59 lhs = v.ir.FPSaturate(lhs);
60 rhs = v.ir.FPSaturate(rhs);
61 }
62 if (promotion) {
63 lhs = v.ir.FPConvert(16, lhs);
64 rhs = v.ir.FPConvert(16, rhs);
65 }
66 v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
67}
68
69void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
70 Swizzle swizzle_b, const IR::U32& src_b) {
71 union {
72 u64 raw;
73 BitField<49, 2, Merge> merge;
74 BitField<47, 2, Swizzle> swizzle_a;
75 BitField<39, 2, HalfPrecision> precision;
76 } const hmul2{insn};
77
78 HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
79 hmul2.precision);
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::HMUL2_reg(u64 insn) {
84 union {
85 u64 raw;
86 BitField<32, 1, u64> sat;
87 BitField<31, 1, u64> neg_b;
88 BitField<30, 1, u64> abs_b;
89 BitField<44, 1, u64> abs_a;
90 BitField<28, 2, Swizzle> swizzle_b;
91 } const hmul2{insn};
92
93 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
94 hmul2.swizzle_b, GetReg20(insn));
95}
96
97void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
98 union {
99 u64 raw;
100 BitField<52, 1, u64> sat;
101 BitField<54, 1, u64> abs_b;
102 BitField<43, 1, u64> neg_a;
103 BitField<44, 1, u64> abs_a;
104 } const hmul2{insn};
105
106 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
107 Swizzle::F32, GetCbuf(insn));
108}
109
110void TranslatorVisitor::HMUL2_imm(u64 insn) {
111 union {
112 u64 raw;
113 BitField<52, 1, u64> sat;
114 BitField<56, 1, u64> neg_high;
115 BitField<30, 9, u64> high;
116 BitField<29, 1, u64> neg_low;
117 BitField<20, 9, u64> low;
118 BitField<43, 1, u64> neg_a;
119 BitField<44, 1, u64> abs_a;
120 } const hmul2{insn};
121
122 const u32 imm{
123 static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
124 static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
125 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
126 Swizzle::H1_H0, ir.Imm32(imm));
127}
128
129void TranslatorVisitor::HMUL2_32I(u64 insn) {
130 union {
131 u64 raw;
132 BitField<55, 2, HalfPrecision> precision;
133 BitField<52, 1, u64> sat;
134 BitField<53, 2, Swizzle> swizzle_a;
135 BitField<20, 32, u64> imm32;
136 } const hmul2{insn};
137
138 const u32 imm{static_cast<u32>(hmul2.imm32)};
139 HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
140 Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
141}
142
143} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
new file mode 100644
index 000000000..cca5b831f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -0,0 +1,117 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
10 bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
11 union {
12 u64 insn;
13 BitField<0, 8, IR::Reg> dest_reg;
14 BitField<8, 8, IR::Reg> src_a_reg;
15 BitField<39, 3, IR::Pred> pred;
16 BitField<42, 1, u64> neg_pred;
17 BitField<43, 1, u64> neg_a;
18 BitField<45, 2, BooleanOp> bop;
19 BitField<44, 1, u64> abs_a;
20 BitField<47, 2, Swizzle> swizzle_a;
21 } const hset2{insn};
22
23 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
24 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
25
26 if (lhs_a.Type() != lhs_b.Type()) {
27 if (lhs_a.Type() == IR::Type::F16) {
28 lhs_a = v.ir.FPConvert(32, lhs_a);
29 rhs_a = v.ir.FPConvert(32, rhs_a);
30 }
31 if (lhs_b.Type() == IR::Type::F16) {
32 lhs_b = v.ir.FPConvert(32, lhs_b);
33 rhs_b = v.ir.FPConvert(32, rhs_b);
34 }
35 }
36
37 lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
38 rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
39
40 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
41 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
42
43 const IR::FpControl control{
44 .no_contraction = false,
45 .rounding = IR::FpRounding::DontCare,
46 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
47 };
48
49 IR::U1 pred{v.ir.GetPred(hset2.pred)};
50 if (hset2.neg_pred != 0) {
51 pred = v.ir.LogicalNot(pred);
52 }
53 const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
54 const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
55 const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
56 const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
57
58 const u32 true_value = bf ? 0x3c00 : 0xffff;
59 const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
60 const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
61 const IR::U32 fail_result{v.ir.Imm32(0)};
62 const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
63 const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
64
65 v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
66}
67} // Anonymous namespace
68
69void TranslatorVisitor::HSET2_reg(u64 insn) {
70 union {
71 u64 insn;
72 BitField<30, 1, u64> abs_b;
73 BitField<49, 1, u64> bf;
74 BitField<31, 1, u64> neg_b;
75 BitField<50, 1, u64> ftz;
76 BitField<35, 4, FPCompareOp> compare_op;
77 BitField<28, 2, Swizzle> swizzle_b;
78 } const hset2{insn};
79
80 HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
81 hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
82}
83
84void TranslatorVisitor::HSET2_cbuf(u64 insn) {
85 union {
86 u64 insn;
87 BitField<53, 1, u64> bf;
88 BitField<56, 1, u64> neg_b;
89 BitField<54, 1, u64> ftz;
90 BitField<49, 4, FPCompareOp> compare_op;
91 } const hset2{insn};
92
93 HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
94 hset2.compare_op, Swizzle::F32);
95}
96
97void TranslatorVisitor::HSET2_imm(u64 insn) {
98 union {
99 u64 insn;
100 BitField<53, 1, u64> bf;
101 BitField<54, 1, u64> ftz;
102 BitField<49, 4, FPCompareOp> compare_op;
103 BitField<56, 1, u64> neg_high;
104 BitField<30, 9, u64> high;
105 BitField<29, 1, u64> neg_low;
106 BitField<20, 9, u64> low;
107 } const hset2{insn};
108
109 const u32 imm{
110 static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
111 static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
112
113 HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
114 Swizzle::H1_H0);
115}
116
117} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
new file mode 100644
index 000000000..b3931dae3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -0,0 +1,118 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
10 Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
11 union {
12 u64 insn;
13 BitField<8, 8, IR::Reg> src_a_reg;
14 BitField<3, 3, IR::Pred> dest_pred_a;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<43, 1, u64> neg_a;
19 BitField<45, 2, BooleanOp> bop;
20 BitField<44, 1, u64> abs_a;
21 BitField<6, 1, u64> ftz;
22 BitField<47, 2, Swizzle> swizzle_a;
23 } const hsetp2{insn};
24
25 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
26 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
27
28 if (lhs_a.Type() != lhs_b.Type()) {
29 if (lhs_a.Type() == IR::Type::F16) {
30 lhs_a = v.ir.FPConvert(32, lhs_a);
31 rhs_a = v.ir.FPConvert(32, rhs_a);
32 }
33 if (lhs_b.Type() == IR::Type::F16) {
34 lhs_b = v.ir.FPConvert(32, lhs_b);
35 rhs_b = v.ir.FPConvert(32, rhs_b);
36 }
37 }
38
39 lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
40 rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
41
42 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
43 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
44
45 const IR::FpControl control{
46 .no_contraction = false,
47 .rounding = IR::FpRounding::DontCare,
48 .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
49 };
50
51 IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
52 if (hsetp2.neg_pred != 0) {
53 pred = v.ir.LogicalNot(pred);
54 }
55 const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
56 const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
57 const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
58 const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
59
60 if (h_and) {
61 auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
62 v.ir.SetPred(hsetp2.dest_pred_a, result);
63 v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
64 } else {
65 v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
66 v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
67 }
68}
69} // Anonymous namespace
70
71void TranslatorVisitor::HSETP2_reg(u64 insn) {
72 union {
73 u64 insn;
74 BitField<30, 1, u64> abs_b;
75 BitField<49, 1, u64> h_and;
76 BitField<31, 1, u64> neg_b;
77 BitField<35, 4, FPCompareOp> compare_op;
78 BitField<28, 2, Swizzle> swizzle_b;
79 } const hsetp2{insn};
80 HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
81 hsetp2.compare_op, hsetp2.h_and != 0);
82}
83
84void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
85 union {
86 u64 insn;
87 BitField<53, 1, u64> h_and;
88 BitField<54, 1, u64> abs_b;
89 BitField<56, 1, u64> neg_b;
90 BitField<49, 4, FPCompareOp> compare_op;
91 } const hsetp2{insn};
92
93 HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
94 hsetp2.compare_op, hsetp2.h_and != 0);
95}
96
97void TranslatorVisitor::HSETP2_imm(u64 insn) {
98 union {
99 u64 insn;
100 BitField<53, 1, u64> h_and;
101 BitField<54, 1, u64> ftz;
102 BitField<49, 4, FPCompareOp> compare_op;
103 BitField<56, 1, u64> neg_high;
104 BitField<30, 9, u64> high;
105 BitField<29, 1, u64> neg_low;
106 BitField<20, 9, u64> low;
107 } const hsetp2{insn};
108
109 const u32 imm{static_cast<u32>(hsetp2.low << 6) |
110 static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
111 static_cast<u32>(hsetp2.high << 22) |
112 static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
113
114 HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
115 hsetp2.h_and != 0);
116}
117
118} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 000000000..b446aae0e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,272 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/frontend/ir/ir_emitter.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
12 u32 offset) {
13 if (unaligned) {
14 return ir.Imm32(0);
15 }
16 return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
17}
18} // Anonymous namespace
19
20IR::U32 TranslatorVisitor::X(IR::Reg reg) {
21 return ir.GetReg(reg);
22}
23
24IR::U64 TranslatorVisitor::L(IR::Reg reg) {
25 if (!IR::IsAligned(reg, 2)) {
26 throw NotImplementedException("Unaligned source register {}", reg);
27 }
28 return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
29}
30
31IR::F32 TranslatorVisitor::F(IR::Reg reg) {
32 return ir.BitCast<IR::F32>(X(reg));
33}
34
35IR::F64 TranslatorVisitor::D(IR::Reg reg) {
36 if (!IR::IsAligned(reg, 2)) {
37 throw NotImplementedException("Unaligned source register {}", reg);
38 }
39 return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
40}
41
42void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
43 ir.SetReg(dest_reg, value);
44}
45
46void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
47 if (!IR::IsAligned(dest_reg, 2)) {
48 throw NotImplementedException("Unaligned destination register {}", dest_reg);
49 }
50 const IR::Value result{ir.UnpackUint2x32(value)};
51 for (int i = 0; i < 2; i++) {
52 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
53 }
54}
55
56void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
57 X(dest_reg, ir.BitCast<IR::U32>(value));
58}
59
60void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
61 if (!IR::IsAligned(dest_reg, 2)) {
62 throw NotImplementedException("Unaligned destination register {}", dest_reg);
63 }
64 const IR::Value result{ir.UnpackDouble2x32(value)};
65 for (int i = 0; i < 2; i++) {
66 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
67 }
68}
69
70IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
71 union {
72 u64 raw;
73 BitField<8, 8, IR::Reg> index;
74 } const reg{insn};
75 return X(reg.index);
76}
77
78IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
79 union {
80 u64 raw;
81 BitField<20, 8, IR::Reg> index;
82 } const reg{insn};
83 return X(reg.index);
84}
85
86IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
87 union {
88 u64 raw;
89 BitField<39, 8, IR::Reg> index;
90 } const reg{insn};
91 return X(reg.index);
92}
93
94IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
95 return ir.BitCast<IR::F32>(GetReg8(insn));
96}
97
98IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
99 return ir.BitCast<IR::F32>(GetReg20(insn));
100}
101
102IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
103 return ir.BitCast<IR::F32>(GetReg39(insn));
104}
105
106IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
107 union {
108 u64 raw;
109 BitField<20, 8, IR::Reg> index;
110 } const reg{insn};
111 return D(reg.index);
112}
113
114IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
115 union {
116 u64 raw;
117 BitField<39, 8, IR::Reg> index;
118 } const reg{insn};
119 return D(reg.index);
120}
121
122static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
123 union {
124 u64 raw;
125 BitField<20, 14, u64> offset;
126 BitField<34, 5, u64> binding;
127 } const cbuf{insn};
128
129 if (cbuf.binding >= 18) {
130 throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
131 }
132 if (cbuf.offset >= 0x10'000) {
133 throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
134 }
135 const IR::Value binding{static_cast<u32>(cbuf.binding)};
136 const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
137 return {IR::U32{binding}, IR::U32{byte_offset}};
138}
139
140IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
141 const auto [binding, byte_offset]{CbufAddr(insn)};
142 return ir.GetCbuf(binding, byte_offset);
143}
144
145IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
146 const auto [binding, byte_offset]{CbufAddr(insn)};
147 return ir.GetFloatCbuf(binding, byte_offset);
148}
149
150IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
151 union {
152 u64 raw;
153 BitField<20, 1, u64> unaligned;
154 } const cbuf{insn};
155
156 const auto [binding, offset_value]{CbufAddr(insn)};
157 const bool unaligned{cbuf.unaligned != 0};
158 const u32 offset{offset_value.U32()};
159 const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
160
161 const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
162 const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
163 return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
164}
165
166IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
167 union {
168 u64 raw;
169 BitField<20, 1, u64> unaligned;
170 } const cbuf{insn};
171
172 if (cbuf.unaligned != 0) {
173 throw NotImplementedException("Unaligned packed constant buffer read");
174 }
175 const auto [binding, lower_offset]{CbufAddr(insn)};
176 const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
177 const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
178 const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
179 return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
180}
181
182IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
183 union {
184 u64 raw;
185 BitField<20, 19, u64> value;
186 BitField<56, 1, u64> is_negative;
187 } const imm{insn};
188
189 if (imm.is_negative != 0) {
190 const s64 raw{static_cast<s64>(imm.value)};
191 return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
192 } else {
193 return ir.Imm32(static_cast<u32>(imm.value));
194 }
195}
196
197IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
198 union {
199 u64 raw;
200 BitField<20, 19, u64> value;
201 BitField<56, 1, u64> is_negative;
202 } const imm{insn};
203 const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
204 const u32 value{static_cast<u32>(imm.value) << 12};
205 return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
206}
207
208IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
209 union {
210 u64 raw;
211 BitField<20, 19, u64> value;
212 BitField<56, 1, u64> is_negative;
213 } const imm{insn};
214 const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
215 const u64 value{imm.value << 44};
216 return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
217}
218
219IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
220 const s64 value{GetImm20(insn).U32()};
221 return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
222}
223
224IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
225 union {
226 u64 raw;
227 BitField<20, 32, u64> value;
228 } const imm{insn};
229 return ir.Imm32(static_cast<u32>(imm.value));
230}
231
232IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
233 union {
234 u64 raw;
235 BitField<20, 32, u64> value;
236 } const imm{insn};
237 return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
238}
239
240void TranslatorVisitor::SetZFlag(const IR::U1& value) {
241 ir.SetZFlag(value);
242}
243
244void TranslatorVisitor::SetSFlag(const IR::U1& value) {
245 ir.SetSFlag(value);
246}
247
248void TranslatorVisitor::SetCFlag(const IR::U1& value) {
249 ir.SetCFlag(value);
250}
251
252void TranslatorVisitor::SetOFlag(const IR::U1& value) {
253 ir.SetOFlag(value);
254}
255
256void TranslatorVisitor::ResetZero() {
257 SetZFlag(ir.Imm1(false));
258}
259
260void TranslatorVisitor::ResetSFlag() {
261 SetSFlag(ir.Imm1(false));
262}
263
264void TranslatorVisitor::ResetCFlag() {
265 SetCFlag(ir.Imm1(false));
266}
267
268void TranslatorVisitor::ResetOFlag() {
269 SetOFlag(ir.Imm1(false));
270}
271
272} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
new file mode 100644
index 000000000..335e4f24f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -0,0 +1,387 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/ir_emitter.h"
10#include "shader_recompiler/frontend/maxwell/instruction.h"
11
12namespace Shader::Maxwell {
13
14enum class CompareOp : u64 {
15 False,
16 LessThan,
17 Equal,
18 LessThanEqual,
19 GreaterThan,
20 NotEqual,
21 GreaterThanEqual,
22 True,
23};
24
25enum class BooleanOp : u64 {
26 AND,
27 OR,
28 XOR,
29};
30
31enum class PredicateOp : u64 {
32 False,
33 True,
34 Zero,
35 NonZero,
36};
37
38enum class FPCompareOp : u64 {
39 F,
40 LT,
41 EQ,
42 LE,
43 GT,
44 NE,
45 GE,
46 NUM,
47 Nan,
48 LTU,
49 EQU,
50 LEU,
51 GTU,
52 NEU,
53 GEU,
54 T,
55};
56
57class TranslatorVisitor {
58public:
59 explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
60
61 Environment& env;
62 IR::IREmitter ir;
63
64 void AL2P(u64 insn);
65 void ALD(u64 insn);
66 void AST(u64 insn);
67 void ATOM_cas(u64 insn);
68 void ATOM(u64 insn);
69 void ATOMS_cas(u64 insn);
70 void ATOMS(u64 insn);
71 void B2R(u64 insn);
72 void BAR(u64 insn);
73 void BFE_reg(u64 insn);
74 void BFE_cbuf(u64 insn);
75 void BFE_imm(u64 insn);
76 void BFI_reg(u64 insn);
77 void BFI_rc(u64 insn);
78 void BFI_cr(u64 insn);
79 void BFI_imm(u64 insn);
80 void BPT(u64 insn);
81 void BRA(u64 insn);
82 void BRK(u64 insn);
83 void BRX(u64 insn);
84 void CAL();
85 void CCTL(u64 insn);
86 void CCTLL(u64 insn);
87 void CONT(u64 insn);
88 void CS2R(u64 insn);
89 void CSET(u64 insn);
90 void CSETP(u64 insn);
91 void DADD_reg(u64 insn);
92 void DADD_cbuf(u64 insn);
93 void DADD_imm(u64 insn);
94 void DEPBAR();
95 void DFMA_reg(u64 insn);
96 void DFMA_rc(u64 insn);
97 void DFMA_cr(u64 insn);
98 void DFMA_imm(u64 insn);
99 void DMNMX_reg(u64 insn);
100 void DMNMX_cbuf(u64 insn);
101 void DMNMX_imm(u64 insn);
102 void DMUL_reg(u64 insn);
103 void DMUL_cbuf(u64 insn);
104 void DMUL_imm(u64 insn);
105 void DSET_reg(u64 insn);
106 void DSET_cbuf(u64 insn);
107 void DSET_imm(u64 insn);
108 void DSETP_reg(u64 insn);
109 void DSETP_cbuf(u64 insn);
110 void DSETP_imm(u64 insn);
111 void EXIT();
112 void F2F_reg(u64 insn);
113 void F2F_cbuf(u64 insn);
114 void F2F_imm(u64 insn);
115 void F2I_reg(u64 insn);
116 void F2I_cbuf(u64 insn);
117 void F2I_imm(u64 insn);
118 void FADD_reg(u64 insn);
119 void FADD_cbuf(u64 insn);
120 void FADD_imm(u64 insn);
121 void FADD32I(u64 insn);
122 void FCHK_reg(u64 insn);
123 void FCHK_cbuf(u64 insn);
124 void FCHK_imm(u64 insn);
125 void FCMP_reg(u64 insn);
126 void FCMP_rc(u64 insn);
127 void FCMP_cr(u64 insn);
128 void FCMP_imm(u64 insn);
129 void FFMA_reg(u64 insn);
130 void FFMA_rc(u64 insn);
131 void FFMA_cr(u64 insn);
132 void FFMA_imm(u64 insn);
133 void FFMA32I(u64 insn);
134 void FLO_reg(u64 insn);
135 void FLO_cbuf(u64 insn);
136 void FLO_imm(u64 insn);
137 void FMNMX_reg(u64 insn);
138 void FMNMX_cbuf(u64 insn);
139 void FMNMX_imm(u64 insn);
140 void FMUL_reg(u64 insn);
141 void FMUL_cbuf(u64 insn);
142 void FMUL_imm(u64 insn);
143 void FMUL32I(u64 insn);
144 void FSET_reg(u64 insn);
145 void FSET_cbuf(u64 insn);
146 void FSET_imm(u64 insn);
147 void FSETP_reg(u64 insn);
148 void FSETP_cbuf(u64 insn);
149 void FSETP_imm(u64 insn);
150 void FSWZADD(u64 insn);
151 void GETCRSPTR(u64 insn);
152 void GETLMEMBASE(u64 insn);
153 void HADD2_reg(u64 insn);
154 void HADD2_cbuf(u64 insn);
155 void HADD2_imm(u64 insn);
156 void HADD2_32I(u64 insn);
157 void HFMA2_reg(u64 insn);
158 void HFMA2_rc(u64 insn);
159 void HFMA2_cr(u64 insn);
160 void HFMA2_imm(u64 insn);
161 void HFMA2_32I(u64 insn);
162 void HMUL2_reg(u64 insn);
163 void HMUL2_cbuf(u64 insn);
164 void HMUL2_imm(u64 insn);
165 void HMUL2_32I(u64 insn);
166 void HSET2_reg(u64 insn);
167 void HSET2_cbuf(u64 insn);
168 void HSET2_imm(u64 insn);
169 void HSETP2_reg(u64 insn);
170 void HSETP2_cbuf(u64 insn);
171 void HSETP2_imm(u64 insn);
172 void I2F_reg(u64 insn);
173 void I2F_cbuf(u64 insn);
174 void I2F_imm(u64 insn);
175 void I2I_reg(u64 insn);
176 void I2I_cbuf(u64 insn);
177 void I2I_imm(u64 insn);
178 void IADD_reg(u64 insn);
179 void IADD_cbuf(u64 insn);
180 void IADD_imm(u64 insn);
181 void IADD3_reg(u64 insn);
182 void IADD3_cbuf(u64 insn);
183 void IADD3_imm(u64 insn);
184 void IADD32I(u64 insn);
185 void ICMP_reg(u64 insn);
186 void ICMP_rc(u64 insn);
187 void ICMP_cr(u64 insn);
188 void ICMP_imm(u64 insn);
189 void IDE(u64 insn);
190 void IDP_reg(u64 insn);
191 void IDP_imm(u64 insn);
192 void IMAD_reg(u64 insn);
193 void IMAD_rc(u64 insn);
194 void IMAD_cr(u64 insn);
195 void IMAD_imm(u64 insn);
196 void IMAD32I(u64 insn);
197 void IMADSP_reg(u64 insn);
198 void IMADSP_rc(u64 insn);
199 void IMADSP_cr(u64 insn);
200 void IMADSP_imm(u64 insn);
201 void IMNMX_reg(u64 insn);
202 void IMNMX_cbuf(u64 insn);
203 void IMNMX_imm(u64 insn);
204 void IMUL_reg(u64 insn);
205 void IMUL_cbuf(u64 insn);
206 void IMUL_imm(u64 insn);
207 void IMUL32I(u64 insn);
208 void IPA(u64 insn);
209 void ISBERD(u64 insn);
210 void ISCADD_reg(u64 insn);
211 void ISCADD_cbuf(u64 insn);
212 void ISCADD_imm(u64 insn);
213 void ISCADD32I(u64 insn);
214 void ISET_reg(u64 insn);
215 void ISET_cbuf(u64 insn);
216 void ISET_imm(u64 insn);
217 void ISETP_reg(u64 insn);
218 void ISETP_cbuf(u64 insn);
219 void ISETP_imm(u64 insn);
220 void JCAL(u64 insn);
221 void JMP(u64 insn);
222 void JMX(u64 insn);
223 void KIL();
224 void LD(u64 insn);
225 void LDC(u64 insn);
226 void LDG(u64 insn);
227 void LDL(u64 insn);
228 void LDS(u64 insn);
229 void LEA_hi_reg(u64 insn);
230 void LEA_hi_cbuf(u64 insn);
231 void LEA_lo_reg(u64 insn);
232 void LEA_lo_cbuf(u64 insn);
233 void LEA_lo_imm(u64 insn);
234 void LEPC(u64 insn);
235 void LONGJMP(u64 insn);
236 void LOP_reg(u64 insn);
237 void LOP_cbuf(u64 insn);
238 void LOP_imm(u64 insn);
239 void LOP3_reg(u64 insn);
240 void LOP3_cbuf(u64 insn);
241 void LOP3_imm(u64 insn);
242 void LOP32I(u64 insn);
243 void MEMBAR(u64 insn);
244 void MOV_reg(u64 insn);
245 void MOV_cbuf(u64 insn);
246 void MOV_imm(u64 insn);
247 void MOV32I(u64 insn);
248 void MUFU(u64 insn);
249 void NOP(u64 insn);
250 void OUT_reg(u64 insn);
251 void OUT_cbuf(u64 insn);
252 void OUT_imm(u64 insn);
253 void P2R_reg(u64 insn);
254 void P2R_cbuf(u64 insn);
255 void P2R_imm(u64 insn);
256 void PBK();
257 void PCNT();
258 void PEXIT(u64 insn);
259 void PIXLD(u64 insn);
260 void PLONGJMP(u64 insn);
261 void POPC_reg(u64 insn);
262 void POPC_cbuf(u64 insn);
263 void POPC_imm(u64 insn);
264 void PRET(u64 insn);
265 void PRMT_reg(u64 insn);
266 void PRMT_rc(u64 insn);
267 void PRMT_cr(u64 insn);
268 void PRMT_imm(u64 insn);
269 void PSET(u64 insn);
270 void PSETP(u64 insn);
271 void R2B(u64 insn);
272 void R2P_reg(u64 insn);
273 void R2P_cbuf(u64 insn);
274 void R2P_imm(u64 insn);
275 void RAM(u64 insn);
276 void RED(u64 insn);
277 void RET(u64 insn);
278 void RRO_reg(u64 insn);
279 void RRO_cbuf(u64 insn);
280 void RRO_imm(u64 insn);
281 void RTT(u64 insn);
282 void S2R(u64 insn);
283 void SAM(u64 insn);
284 void SEL_reg(u64 insn);
285 void SEL_cbuf(u64 insn);
286 void SEL_imm(u64 insn);
287 void SETCRSPTR(u64 insn);
288 void SETLMEMBASE(u64 insn);
289 void SHF_l_reg(u64 insn);
290 void SHF_l_imm(u64 insn);
291 void SHF_r_reg(u64 insn);
292 void SHF_r_imm(u64 insn);
293 void SHFL(u64 insn);
294 void SHL_reg(u64 insn);
295 void SHL_cbuf(u64 insn);
296 void SHL_imm(u64 insn);
297 void SHR_reg(u64 insn);
298 void SHR_cbuf(u64 insn);
299 void SHR_imm(u64 insn);
300 void SSY();
301 void ST(u64 insn);
302 void STG(u64 insn);
303 void STL(u64 insn);
304 void STP(u64 insn);
305 void STS(u64 insn);
306 void SUATOM(u64 insn);
307 void SUATOM_cas(u64 insn);
308 void SULD(u64 insn);
309 void SURED(u64 insn);
310 void SUST(u64 insn);
311 void SYNC(u64 insn);
312 void TEX(u64 insn);
313 void TEX_b(u64 insn);
314 void TEXS(u64 insn);
315 void TLD(u64 insn);
316 void TLD_b(u64 insn);
317 void TLD4(u64 insn);
318 void TLD4_b(u64 insn);
319 void TLD4S(u64 insn);
320 void TLDS(u64 insn);
321 void TMML(u64 insn);
322 void TMML_b(u64 insn);
323 void TXA(u64 insn);
324 void TXD(u64 insn);
325 void TXD_b(u64 insn);
326 void TXQ(u64 insn);
327 void TXQ_b(u64 insn);
328 void VABSDIFF(u64 insn);
329 void VABSDIFF4(u64 insn);
330 void VADD(u64 insn);
331 void VMAD(u64 insn);
332 void VMNMX(u64 insn);
333 void VOTE(u64 insn);
334 void VOTE_vtg(u64 insn);
335 void VSET(u64 insn);
336 void VSETP(u64 insn);
337 void VSHL(u64 insn);
338 void VSHR(u64 insn);
339 void XMAD_reg(u64 insn);
340 void XMAD_rc(u64 insn);
341 void XMAD_cr(u64 insn);
342 void XMAD_imm(u64 insn);
343
344 [[nodiscard]] IR::U32 X(IR::Reg reg);
345 [[nodiscard]] IR::U64 L(IR::Reg reg);
346 [[nodiscard]] IR::F32 F(IR::Reg reg);
347 [[nodiscard]] IR::F64 D(IR::Reg reg);
348
349 void X(IR::Reg dest_reg, const IR::U32& value);
350 void L(IR::Reg dest_reg, const IR::U64& value);
351 void F(IR::Reg dest_reg, const IR::F32& value);
352 void D(IR::Reg dest_reg, const IR::F64& value);
353
354 [[nodiscard]] IR::U32 GetReg8(u64 insn);
355 [[nodiscard]] IR::U32 GetReg20(u64 insn);
356 [[nodiscard]] IR::U32 GetReg39(u64 insn);
357 [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
358 [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
359 [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
360 [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
361 [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
362
363 [[nodiscard]] IR::U32 GetCbuf(u64 insn);
364 [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
365 [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
366 [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
367
368 [[nodiscard]] IR::U32 GetImm20(u64 insn);
369 [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
370 [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
371 [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
372
373 [[nodiscard]] IR::U32 GetImm32(u64 insn);
374 [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
375
376 void SetZFlag(const IR::U1& value);
377 void SetSFlag(const IR::U1& value);
378 void SetCFlag(const IR::U1& value);
379 void SetOFlag(const IR::U1& value);
380
381 void ResetZero();
382 void ResetSFlag();
383 void ResetCFlag();
384 void ResetOFlag();
385};
386
387} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..8ffd84867
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,105 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
12 bool cc) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a;
17 } const iadd{insn};
18
19 if (sat) {
20 throw NotImplementedException("IADD SAT");
21 }
22 if (x && po) {
23 throw NotImplementedException("IADD X+PO");
24 }
25 // Operand A is always read from here, negated if needed
26 IR::U32 op_a{v.X(iadd.src_a)};
27 if (neg_a) {
28 op_a = v.ir.INeg(op_a);
29 }
30 // Add both operands
31 IR::U32 result{v.ir.IAdd(op_a, op_b)};
32 if (x) {
33 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
34 result = v.ir.IAdd(result, carry);
35 }
36 if (po) {
37 // .PO adds one to the result
38 result = v.ir.IAdd(result, v.ir.Imm32(1));
39 }
40 if (cc) {
41 // Store flags
42 // TODO: Does this grab the result pre-PO or after?
43 if (po) {
44 throw NotImplementedException("IADD CC+PO");
45 }
46 // TODO: How does CC behave when X is set?
47 if (x) {
48 throw NotImplementedException("IADD X+CC");
49 }
50 v.SetZFlag(v.ir.GetZeroFromOp(result));
51 v.SetSFlag(v.ir.GetSignFromOp(result));
52 v.SetCFlag(v.ir.GetCarryFromOp(result));
53 v.SetOFlag(v.ir.GetOverflowFromOp(result));
54 }
55 // Store result
56 v.X(iadd.dest_reg, result);
57}
58
59void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
60 union {
61 u64 insn;
62 BitField<43, 1, u64> x;
63 BitField<47, 1, u64> cc;
64 BitField<48, 2, u64> three_for_po;
65 BitField<48, 1, u64> neg_b;
66 BitField<49, 1, u64> neg_a;
67 BitField<50, 1, u64> sat;
68 } const iadd{insn};
69
70 const bool po{iadd.three_for_po == 3};
71 if (!po && iadd.neg_b != 0) {
72 op_b = v.ir.INeg(op_b);
73 }
74 IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
75}
76} // Anonymous namespace
77
78void TranslatorVisitor::IADD_reg(u64 insn) {
79 IADD(*this, insn, GetReg20(insn));
80}
81
82void TranslatorVisitor::IADD_cbuf(u64 insn) {
83 IADD(*this, insn, GetCbuf(insn));
84}
85
86void TranslatorVisitor::IADD_imm(u64 insn) {
87 IADD(*this, insn, GetImm20(insn));
88}
89
90void TranslatorVisitor::IADD32I(u64 insn) {
91 union {
92 u64 raw;
93 BitField<52, 1, u64> cc;
94 BitField<53, 1, u64> x;
95 BitField<54, 1, u64> sat;
96 BitField<55, 2, u64> three_for_po;
97 BitField<56, 1, u64> neg_a;
98 } const iadd32i{insn};
99
100 const bool po{iadd32i.three_for_po == 3};
101 const bool neg_a{!po && iadd32i.neg_a != 0};
102 IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
103}
104
105} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
new file mode 100644
index 000000000..040cfc10f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
@@ -0,0 +1,122 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Shift : u64 {
12 None,
13 Right,
14 Left,
15};
16enum class Half : u64 {
17 All,
18 Lower,
19 Upper,
20};
21
22[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
23 constexpr bool is_signed{false};
24 switch (half) {
25 case Half::All:
26 return value;
27 case Half::Lower:
28 return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
29 case Half::Upper:
30 return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
31 }
32 throw NotImplementedException("Invalid half");
33}
34
35[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
36 switch (shift) {
37 case Shift::None:
38 return value;
39 case Shift::Right: {
40 // 33-bit RS IADD3 edge case
41 const IR::U1 edge_case{ir.GetCarryFromOp(value)};
42 const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
43 return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
44 }
45 case Shift::Left:
46 return ir.ShiftLeftLogical(value, ir.Imm32(16));
47 }
48 throw NotImplementedException("Invalid shift");
49}
50
51void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
52 Shift shift = Shift::None) {
53 union {
54 u64 insn;
55 BitField<0, 8, IR::Reg> dest_reg;
56 BitField<47, 1, u64> cc;
57 BitField<48, 1, u64> x;
58 BitField<49, 1, u64> neg_c;
59 BitField<50, 1, u64> neg_b;
60 BitField<51, 1, u64> neg_a;
61 } iadd3{insn};
62
63 if (iadd3.neg_a != 0) {
64 op_a = v.ir.INeg(op_a);
65 }
66 if (iadd3.neg_b != 0) {
67 op_b = v.ir.INeg(op_b);
68 }
69 if (iadd3.neg_c != 0) {
70 op_c = v.ir.INeg(op_c);
71 }
72 IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
73 if (iadd3.x != 0) {
74 // TODO: How does RS behave when X is set?
75 if (shift == Shift::Right) {
76 throw NotImplementedException("IADD3 X+RS");
77 }
78 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
79 lhs_1 = v.ir.IAdd(lhs_1, carry);
80 }
81 const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
82 const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
83
84 v.X(iadd3.dest_reg, result);
85 if (iadd3.cc != 0) {
86 // TODO: How does CC behave when X is set?
87 if (iadd3.x != 0) {
88 throw NotImplementedException("IADD3 X+CC");
89 }
90 v.SetZFlag(v.ir.GetZeroFromOp(result));
91 v.SetSFlag(v.ir.GetSignFromOp(result));
92 v.SetCFlag(v.ir.GetCarryFromOp(result));
93 const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
94 v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
95 }
96}
97} // Anonymous namespace
98
99void TranslatorVisitor::IADD3_reg(u64 insn) {
100 union {
101 u64 insn;
102 BitField<37, 2, Shift> shift;
103 BitField<35, 2, Half> half_a;
104 BitField<33, 2, Half> half_b;
105 BitField<31, 2, Half> half_c;
106 } const iadd3{insn};
107
108 const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
109 const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
110 const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
111 IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
112}
113
114void TranslatorVisitor::IADD3_cbuf(u64 insn) {
115 IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
116}
117
118void TranslatorVisitor::IADD3_imm(u64 insn) {
119 IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
120}
121
122} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
new file mode 100644
index 000000000..ba6e01926
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
@@ -0,0 +1,48 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<48, 1, u64> is_signed;
18 BitField<49, 3, CompareOp> compare_op;
19 } const icmp{insn};
20
21 const IR::U32 zero{v.ir.Imm32(0)};
22 const bool is_signed{icmp.is_signed != 0};
23 const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
24
25 const IR::U32 src_reg{v.X(icmp.src_reg)};
26 const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
27
28 v.X(icmp.dest_reg, result);
29}
30} // Anonymous namespace
31
32void TranslatorVisitor::ICMP_reg(u64 insn) {
33 ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
34}
35
36void TranslatorVisitor::ICMP_rc(u64 insn) {
37 ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
38}
39
40void TranslatorVisitor::ICMP_cr(u64 insn) {
41 ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
42}
43
44void TranslatorVisitor::ICMP_imm(u64 insn) {
45 ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
46}
47
48} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
new file mode 100644
index 000000000..8ce1aee04
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
@@ -0,0 +1,80 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
13 CompareOp compare_op, bool is_signed, bool x) {
14 return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
15 : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
16}
17
18void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
19 union {
20 u64 insn;
21 BitField<0, 8, IR::Reg> dest_reg;
22 BitField<8, 8, IR::Reg> src_reg;
23 BitField<39, 3, IR::Pred> pred;
24 BitField<42, 1, u64> neg_pred;
25 BitField<43, 1, u64> x;
26 BitField<44, 1, u64> bf;
27 BitField<45, 2, BooleanOp> bop;
28 BitField<47, 1, u64> cc;
29 BitField<48, 1, u64> is_signed;
30 BitField<49, 3, CompareOp> compare_op;
31 } const iset{insn};
32
33 const IR::U32 src_a{v.X(iset.src_reg)};
34 const bool is_signed{iset.is_signed != 0};
35 const IR::U32 zero{v.ir.Imm32(0)};
36 const bool x{iset.x != 0};
37 const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
38
39 IR::U1 pred{v.ir.GetPred(iset.pred)};
40 if (iset.neg_pred != 0) {
41 pred = v.ir.LogicalNot(pred);
42 }
43 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
44
45 const IR::U32 one_mask{v.ir.Imm32(-1)};
46 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
47 const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
48 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
49
50 v.X(iset.dest_reg, result);
51 if (iset.cc != 0) {
52 if (x) {
53 throw NotImplementedException("ISET.CC + X");
54 }
55 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
56 v.SetZFlag(is_zero);
57 if (iset.bf != 0) {
58 v.ResetSFlag();
59 } else {
60 v.SetSFlag(v.ir.LogicalNot(is_zero));
61 }
62 v.ResetCFlag();
63 v.ResetOFlag();
64 }
65}
66} // Anonymous namespace
67
68void TranslatorVisitor::ISET_reg(u64 insn) {
69 ISET(*this, insn, GetReg20(insn));
70}
71
72void TranslatorVisitor::ISET_cbuf(u64 insn) {
73 ISET(*this, insn, GetCbuf(insn));
74}
75
76void TranslatorVisitor::ISET_imm(u64 insn) {
77 ISET(*this, insn, GetImm20(insn));
78}
79
80} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 000000000..0b8119ddd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,182 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class FloatFormat : u64 {
13 F16 = 1,
14 F32 = 2,
15 F64 = 3,
16};
17
18enum class IntFormat : u64 {
19 U8 = 0,
20 U16 = 1,
21 U32 = 2,
22 U64 = 3,
23};
24
25union Encoding {
26 u64 raw;
27 BitField<0, 8, IR::Reg> dest_reg;
28 BitField<8, 2, FloatFormat> float_format;
29 BitField<10, 2, IntFormat> int_format;
30 BitField<13, 1, u64> is_signed;
31 BitField<39, 2, FpRounding> fp_rounding;
32 BitField<41, 2, u64> selector;
33 BitField<47, 1, u64> cc;
34 BitField<45, 1, u64> neg;
35 BitField<49, 1, u64> abs;
36};
37
38bool Is64(u64 insn) {
39 return Encoding{insn}.int_format == IntFormat::U64;
40}
41
42int BitSize(FloatFormat format) {
43 switch (format) {
44 case FloatFormat::F16:
45 return 16;
46 case FloatFormat::F32:
47 return 32;
48 case FloatFormat::F64:
49 return 64;
50 }
51 throw NotImplementedException("Invalid float format {}", format);
52}
53
54IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
55 const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
56 const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
57 const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
58 const IR::U1 is_least{v.ir.IEqual(value, least_value)};
59 return IR::U32{v.ir.Select(is_least, value, absolute)};
60}
61
62void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
63 const Encoding i2f{insn};
64 if (i2f.cc != 0) {
65 throw NotImplementedException("I2F CC");
66 }
67 const bool is_signed{i2f.is_signed != 0};
68 int src_bitsize{};
69 switch (i2f.int_format) {
70 case IntFormat::U8:
71 src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
72 v.ir.Imm32(8), is_signed);
73 if (i2f.abs != 0) {
74 src = SmallAbs(v, src, 8);
75 }
76 src_bitsize = 8;
77 break;
78 case IntFormat::U16:
79 if (i2f.selector == 1 || i2f.selector == 3) {
80 throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
81 }
82 src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
83 v.ir.Imm32(16), is_signed);
84 if (i2f.abs != 0) {
85 src = SmallAbs(v, src, 16);
86 }
87 src_bitsize = 16;
88 break;
89 case IntFormat::U32:
90 case IntFormat::U64:
91 if (i2f.selector != 0) {
92 throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
93 }
94 if (i2f.abs != 0 && is_signed) {
95 src = v.ir.IAbs(src);
96 }
97 src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
98 break;
99 }
100 const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
101 const int dst_bitsize{BitSize(i2f.float_format)};
102 const IR::FpControl fp_control{
103 .no_contraction = false,
104 .rounding = CastFpRounding(i2f.fp_rounding),
105 .fmz_mode = IR::FmzMode::DontCare,
106 };
107 auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize),
108 static_cast<size_t>(conversion_src_bitsize), is_signed, src,
109 fp_control)};
110 if (i2f.neg != 0) {
111 if (i2f.abs != 0 || !is_signed) {
112 // We know the value is positive
113 value = v.ir.FPNeg(value);
114 } else {
115 // Only negate if the input isn't the lowest value
116 IR::U1 is_least;
117 if (src_bitsize == 64) {
118 is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
119 } else if (src_bitsize == 32) {
120 is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
121 } else {
122 const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
123 is_least = v.ir.IEqual(src, least_value);
124 }
125 value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
126 }
127 }
128 switch (i2f.float_format) {
129 case FloatFormat::F16: {
130 const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
131 v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
132 break;
133 }
134 case FloatFormat::F32:
135 v.F(i2f.dest_reg, value);
136 break;
137 case FloatFormat::F64: {
138 if (!IR::IsAligned(i2f.dest_reg, 2)) {
139 throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
140 }
141 const IR::Value vector{v.ir.UnpackDouble2x32(value)};
142 for (int i = 0; i < 2; ++i) {
143 v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
144 }
145 break;
146 }
147 default:
148 throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
149 }
150}
151} // Anonymous namespace
152
153void TranslatorVisitor::I2F_reg(u64 insn) {
154 if (Is64(insn)) {
155 union {
156 u64 raw;
157 BitField<20, 8, IR::Reg> reg;
158 } const value{insn};
159 const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
160 I2F(*this, insn, ir.PackUint2x32(regs));
161 } else {
162 I2F(*this, insn, GetReg20(insn));
163 }
164}
165
166void TranslatorVisitor::I2F_cbuf(u64 insn) {
167 if (Is64(insn)) {
168 I2F(*this, insn, GetPackedCbuf(insn));
169 } else {
170 I2F(*this, insn, GetCbuf(insn));
171 }
172}
173
174void TranslatorVisitor::I2F_imm(u64 insn) {
175 if (Is64(insn)) {
176 I2F(*this, insn, GetPackedImm20(insn));
177 } else {
178 I2F(*this, insn, GetImm20(insn));
179 }
180}
181
182} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
new file mode 100644
index 000000000..5feefc0ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -0,0 +1,82 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class MaxShift : u64 {
12 U32,
13 Undefined,
14 U64,
15 S64,
16};
17
18IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
19 bool right_shift, bool is_signed) {
20 if (!right_shift) {
21 return ir.ShiftLeftLogical(packed_int, safe_shift);
22 }
23 if (is_signed) {
24 return ir.ShiftRightArithmetic(packed_int, safe_shift);
25 }
26 return ir.ShiftRightLogical(packed_int, safe_shift);
27}
28
29void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
30 bool right_shift) {
31 union {
32 u64 insn;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<0, 8, IR::Reg> lo_bits_reg;
35 BitField<37, 2, MaxShift> max_shift;
36 BitField<47, 1, u64> cc;
37 BitField<48, 2, u64> x_mode;
38 BitField<50, 1, u64> wrap;
39 } const shf{insn};
40
41 if (shf.cc != 0) {
42 throw NotImplementedException("SHF CC");
43 }
44 if (shf.x_mode != 0) {
45 throw NotImplementedException("SHF X Mode");
46 }
47 if (shf.max_shift == MaxShift::Undefined) {
48 throw NotImplementedException("SHF Use of undefined MaxShift value");
49 }
50 const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
51 const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
52 const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
53 const IR::U32 safe_shift{shf.wrap != 0
54 ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
55 : v.ir.UMin(shift, max_shift)};
56
57 const bool is_signed{shf.max_shift == MaxShift::S64};
58 const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
59 const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
60
61 const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
62 v.X(shf.dest_reg, result);
63}
64} // Anonymous namespace
65
66void TranslatorVisitor::SHF_l_reg(u64 insn) {
67 SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
68}
69
70void TranslatorVisitor::SHF_l_imm(u64 insn) {
71 SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
72}
73
74void TranslatorVisitor::SHF_r_reg(u64 insn) {
75 SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
76}
77
78void TranslatorVisitor::SHF_r_imm(u64 insn) {
79 SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
80}
81
82} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
new file mode 100644
index 000000000..1badbacc4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
@@ -0,0 +1,64 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<43, 2, u64> mode;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> is_signed;
21 } const imnmx{insn};
22
23 if (imnmx.cc != 0) {
24 throw NotImplementedException("IMNMX CC");
25 }
26
27 if (imnmx.mode != 0) {
28 throw NotImplementedException("IMNMX.MODE");
29 }
30
31 const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
32 const IR::U32 op_a{v.X(imnmx.src_reg)};
33 IR::U32 min;
34 IR::U32 max;
35
36 if (imnmx.is_signed != 0) {
37 min = IR::U32{v.ir.SMin(op_a, op_b)};
38 max = IR::U32{v.ir.SMax(op_a, op_b)};
39 } else {
40 min = IR::U32{v.ir.UMin(op_a, op_b)};
41 max = IR::U32{v.ir.UMax(op_a, op_b)};
42 }
43 if (imnmx.neg_pred != 0) {
44 std::swap(min, max);
45 }
46
47 const IR::U32 result{v.ir.Select(pred, min, max)};
48 v.X(imnmx.dest_reg, result);
49}
50} // Anonymous namespace
51
52void TranslatorVisitor::IMNMX_reg(u64 insn) {
53 IMNMX(*this, insn, GetReg20(insn));
54}
55
56void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
57 IMNMX(*this, insn, GetCbuf(insn));
58}
59
60void TranslatorVisitor::IMNMX_imm(u64 insn) {
61 IMNMX(*this, insn, GetImm20(insn));
62}
63
64} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
new file mode 100644
index 000000000..5ece7678d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
@@ -0,0 +1,36 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<40, 1, u64> tilde;
16 } const popc{insn};
17
18 const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
19 const IR::U32 result = v.ir.BitCount(operand);
20 v.X(popc.dest_reg, result);
21}
22} // Anonymous namespace
23
24void TranslatorVisitor::POPC_reg(u64 insn) {
25 POPC(*this, insn, GetReg20(insn));
26}
27
28void TranslatorVisitor::POPC_cbuf(u64 insn) {
29 POPC(*this, insn, GetCbuf(insn));
30}
31
32void TranslatorVisitor::POPC_imm(u64 insn) {
33 POPC(*this, insn, GetImm20(insn));
34}
35
36} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..044671943
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,86 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
12 u64 scale_imm) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> op_a;
17 } const iscadd{insn};
18
19 const bool po{neg_a && neg_b};
20 IR::U32 op_a{v.X(iscadd.op_a)};
21 if (po) {
22 // When PO is present, add one
23 op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
24 } else {
25 // When PO is not present, the bits are interpreted as negation
26 if (neg_a) {
27 op_a = v.ir.INeg(op_a);
28 }
29 if (neg_b) {
30 op_b = v.ir.INeg(op_b);
31 }
32 }
33 // With the operands already processed, scale A
34 const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
35 const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
36
37 const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
38 v.X(iscadd.dest_reg, result);
39
40 if (cc) {
41 v.SetZFlag(v.ir.GetZeroFromOp(result));
42 v.SetSFlag(v.ir.GetSignFromOp(result));
43 const IR::U1 carry{v.ir.GetCarryFromOp(result)};
44 const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
45 v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
46 v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
47 }
48}
49
50void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
51 union {
52 u64 raw;
53 BitField<47, 1, u64> cc;
54 BitField<48, 1, u64> neg_b;
55 BitField<49, 1, u64> neg_a;
56 BitField<39, 5, u64> scale;
57 } const iscadd{insn};
58
59 ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
60}
61
62} // Anonymous namespace
63
64void TranslatorVisitor::ISCADD_reg(u64 insn) {
65 ISCADD(*this, insn, GetReg20(insn));
66}
67
68void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
69 ISCADD(*this, insn, GetCbuf(insn));
70}
71
72void TranslatorVisitor::ISCADD_imm(u64 insn) {
73 ISCADD(*this, insn, GetImm20(insn));
74}
75
76void TranslatorVisitor::ISCADD32I(u64 insn) {
77 union {
78 u64 raw;
79 BitField<52, 1, u64> cc;
80 BitField<53, 5, u64> scale;
81 } const iscadd{insn};
82
83 return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
84}
85
86} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..bee10e5b9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
13 CompareOp compare_op, bool is_signed, bool x) {
14 return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
15 : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
16}
17
18void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
19 union {
20 u64 raw;
21 BitField<0, 3, IR::Pred> dest_pred_b;
22 BitField<3, 3, IR::Pred> dest_pred_a;
23 BitField<8, 8, IR::Reg> src_reg_a;
24 BitField<39, 3, IR::Pred> bop_pred;
25 BitField<42, 1, u64> neg_bop_pred;
26 BitField<43, 1, u64> x;
27 BitField<45, 2, BooleanOp> bop;
28 BitField<48, 1, u64> is_signed;
29 BitField<49, 3, CompareOp> compare_op;
30 } const isetp{insn};
31
32 const bool is_signed{isetp.is_signed != 0};
33 const bool x{isetp.x != 0};
34 const BooleanOp bop{isetp.bop};
35 const CompareOp compare_op{isetp.compare_op};
36 const IR::U32 op_a{v.X(isetp.src_reg_a)};
37 const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)};
38 const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
39 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
40 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
41 v.ir.SetPred(isetp.dest_pred_a, result_a);
42 v.ir.SetPred(isetp.dest_pred_b, result_b);
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::ISETP_reg(u64 insn) {
47 ISETP(*this, insn, GetReg20(insn));
48}
49
50void TranslatorVisitor::ISETP_cbuf(u64 insn) {
51 ISETP(*this, insn, GetCbuf(insn));
52}
53
54void TranslatorVisitor::ISETP_imm(u64 insn) {
55 ISETP(*this, insn, GetImm20(insn));
56}
57
58} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..20af68852
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> w;
17 BitField<43, 1, u64> x;
18 BitField<47, 1, u64> cc;
19 } const shl{insn};
20
21 if (shl.x != 0) {
22 throw NotImplementedException("SHL.X");
23 }
24 if (shl.cc != 0) {
25 throw NotImplementedException("SHL.CC");
26 }
27 const IR::U32 base{v.X(shl.src_reg_a)};
28 IR::U32 result;
29 if (shl.w != 0) {
30 // When .W is set, the shift value is wrapped
31 // To emulate this we just have to wrap it ourselves.
32 const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
33 result = v.ir.ShiftLeftLogical(base, shift);
34 } else {
35 // When .W is not set, the shift value is clamped between 0 and 32.
36 // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
37 // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
38 //
39 // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
40 // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
41 // or equal to the bit width of the components of Base."
42 //
43 // And on the GLASM specification it is also safe to evaluate out of bounds:
44 //
45 // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
46 // "The results of a shift operation ("<<") are undefined if the value of the second operand
47 // is negative, or greater than or equal to the number of bits in the first operand."
48 //
49 // Emphasis on undefined results in contrast to undefined behavior.
50 //
51 const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
52 const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
53 result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
54 }
55 v.X(shl.dest_reg, result);
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::SHL_reg(u64 insn) {
60 SHL(*this, insn, GetReg20(insn));
61}
62
63void TranslatorVisitor::SHL_cbuf(u64 insn) {
64 SHL(*this, insn, GetCbuf(insn));
65}
66
67void TranslatorVisitor::SHL_imm(u64 insn) {
68 SHL(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
new file mode 100644
index 000000000..be00bb605
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> is_wrapped;
17 BitField<40, 1, u64> brev;
18 BitField<43, 1, u64> xmode;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> is_signed;
21 } const shr{insn};
22
23 if (shr.xmode != 0) {
24 throw NotImplementedException("SHR.XMODE");
25 }
26 if (shr.cc != 0) {
27 throw NotImplementedException("SHR.CC");
28 }
29
30 IR::U32 base{v.X(shr.src_reg_a)};
31 if (shr.brev == 1) {
32 base = v.ir.BitReverse(base);
33 }
34 IR::U32 result;
35 const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
36 if (shr.is_signed == 1) {
37 result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
38 } else {
39 result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
40 }
41
42 if (shr.is_wrapped == 0) {
43 const IR::U32 zero{v.ir.Imm32(0)};
44 const IR::U32 safe_bits{v.ir.Imm32(32)};
45
46 const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
47 const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
48 const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
49 result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
50 }
51 v.X(shr.dest_reg, result);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::SHR_reg(u64 insn) {
56 SHR(*this, insn, GetReg20(insn));
57}
58
59void TranslatorVisitor::SHR_cbuf(u64 insn) {
60 SHR(*this, insn, GetCbuf(insn));
61}
62
63void TranslatorVisitor::SHR_imm(u64 insn) {
64 SHR(*this, insn, GetImm20(insn));
65}
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..2932cdc42
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,135 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SelectMode : u64 {
12 Default,
13 CLO,
14 CHI,
15 CSFU,
16 CBCC,
17};
18
19enum class Half : u64 {
20 H0, // Least-significant bits (15:0)
21 H1, // Most-significant bits (31:16)
22};
23
24IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
25 const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
26 return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
27}
28
29void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
30 SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
31 union {
32 u64 raw;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<8, 8, IR::Reg> src_reg_a;
35 BitField<47, 1, u64> cc;
36 BitField<48, 1, u64> is_a_signed;
37 BitField<49, 1, u64> is_b_signed;
38 BitField<53, 1, Half> half_a;
39 } const xmad{insn};
40
41 if (x) {
42 throw NotImplementedException("XMAD X");
43 }
44 const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
45 const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
46
47 IR::U32 product{v.ir.IMul(op_a, op_b)};
48 if (psl) {
49 // .PSL shifts the product 16 bits
50 product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
51 }
52 const IR::U32 op_c{[&]() -> IR::U32 {
53 switch (select_mode) {
54 case SelectMode::Default:
55 return src_c;
56 case SelectMode::CLO:
57 return ExtractHalf(v, src_c, Half::H0, false);
58 case SelectMode::CHI:
59 return ExtractHalf(v, src_c, Half::H1, false);
60 case SelectMode::CBCC:
61 return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
62 case SelectMode::CSFU:
63 throw NotImplementedException("XMAD CSFU");
64 }
65 throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
66 }()};
67 IR::U32 result{v.ir.IAdd(product, op_c)};
68 if (mrg) {
69 // .MRG inserts src_b [15:0] into result's [31:16].
70 const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
71 result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
72 }
73 if (xmad.cc) {
74 throw NotImplementedException("XMAD CC");
75 }
76 // Store result
77 v.X(xmad.dest_reg, result);
78}
79} // Anonymous namespace
80
81void TranslatorVisitor::XMAD_reg(u64 insn) {
82 union {
83 u64 raw;
84 BitField<35, 1, Half> half_b;
85 BitField<36, 1, u64> psl;
86 BitField<37, 1, u64> mrg;
87 BitField<38, 1, u64> x;
88 BitField<50, 3, SelectMode> select_mode;
89 } const xmad{insn};
90
91 XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
92 xmad.mrg != 0, xmad.x != 0);
93}
94
95void TranslatorVisitor::XMAD_rc(u64 insn) {
96 union {
97 u64 raw;
98 BitField<50, 2, SelectMode> select_mode;
99 BitField<52, 1, Half> half_b;
100 BitField<54, 1, u64> x;
101 } const xmad{insn};
102
103 XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
104 xmad.x != 0);
105}
106
107void TranslatorVisitor::XMAD_cr(u64 insn) {
108 union {
109 u64 raw;
110 BitField<50, 2, SelectMode> select_mode;
111 BitField<52, 1, Half> half_b;
112 BitField<54, 1, u64> x;
113 BitField<55, 1, u64> psl;
114 BitField<56, 1, u64> mrg;
115 } const xmad{insn};
116
117 XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
118 xmad.mrg != 0, xmad.x != 0);
119}
120
121void TranslatorVisitor::XMAD_imm(u64 insn) {
122 union {
123 u64 raw;
124 BitField<20, 16, u64> src_b;
125 BitField<36, 1, u64> psl;
126 BitField<37, 1, u64> mrg;
127 BitField<38, 1, u64> x;
128 BitField<50, 3, SelectMode> select_mode;
129 } const xmad{insn};
130
131 XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
132 Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
133}
134
135} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
new file mode 100644
index 000000000..53e8d8923
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
@@ -0,0 +1,126 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class IntegerWidth : u64 {
12 Byte,
13 Short,
14 Word,
15};
16
17[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
18 switch (width) {
19 case IntegerWidth::Byte:
20 return ir.Imm32(8);
21 case IntegerWidth::Short:
22 return ir.Imm32(16);
23 case IntegerWidth::Word:
24 return ir.Imm32(32);
25 default:
26 throw NotImplementedException("Invalid width {}", width);
27 }
28}
29
30[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
31 IntegerWidth dst_width) {
32 const IR::U32 zero{ir.Imm32(0)};
33 const IR::U32 count{WidthSize(ir, dst_width)};
34 return ir.BitFieldExtract(src, zero, count, false);
35}
36
37[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
38 bool dst_signed, bool src_signed) {
39 IR::U32 min{};
40 IR::U32 max{};
41 const IR::U32 zero{ir.Imm32(0)};
42 switch (dst_width) {
43 case IntegerWidth::Byte:
44 min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
45 max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
46 break;
47 case IntegerWidth::Short:
48 min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
49 max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
50 break;
51 case IntegerWidth::Word:
52 min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
53 max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
54 break;
55 default:
56 throw NotImplementedException("Invalid width {}", dst_width);
57 }
58 const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
59 return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
60}
61
62void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
63 union {
64 u64 insn;
65 BitField<0, 8, IR::Reg> dest_reg;
66 BitField<8, 2, IntegerWidth> dst_fmt;
67 BitField<12, 1, u64> dst_fmt_sign;
68 BitField<10, 2, IntegerWidth> src_fmt;
69 BitField<13, 1, u64> src_fmt_sign;
70 BitField<41, 3, u64> selector;
71 BitField<45, 1, u64> neg;
72 BitField<47, 1, u64> cc;
73 BitField<49, 1, u64> abs;
74 BitField<50, 1, u64> sat;
75 } const i2i{insn};
76
77 if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
78 throw NotImplementedException("16-bit source format incompatible with selector {}",
79 i2i.selector);
80 }
81 if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
82 throw NotImplementedException("32-bit source format incompatible with selector {}",
83 i2i.selector);
84 }
85
86 const s32 selector{static_cast<s32>(i2i.selector)};
87 const IR::U32 offset{v.ir.Imm32(selector * 8)};
88 const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
89 const bool src_signed{i2i.src_fmt_sign != 0};
90 const bool dst_signed{i2i.dst_fmt_sign != 0};
91 const bool sat{i2i.sat != 0};
92
93 IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
94 if (i2i.abs != 0) {
95 src_values = v.ir.IAbs(src_values);
96 }
97 if (i2i.neg != 0) {
98 src_values = v.ir.INeg(src_values);
99 }
100 const IR::U32 result{
101 sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
102 : ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
103
104 v.X(i2i.dest_reg, result);
105 if (i2i.cc != 0) {
106 v.SetZFlag(v.ir.GetZeroFromOp(result));
107 v.SetSFlag(v.ir.GetSignFromOp(result));
108 v.ResetCFlag();
109 v.ResetOFlag();
110 }
111}
112} // Anonymous namespace
113
114void TranslatorVisitor::I2I_reg(u64 insn) {
115 I2I(*this, insn, GetReg20(insn));
116}
117
118void TranslatorVisitor::I2I_cbuf(u64 insn) {
119 I2I(*this, insn, GetCbuf(insn));
120}
121
122void TranslatorVisitor::I2I_imm(u64 insn) {
123 I2I(*this, insn, GetImm20(insn));
124}
125
126} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
new file mode 100644
index 000000000..9b85f8059
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 Default,
13 Patch,
14 Prim,
15 Attr,
16};
17
18enum class Shift : u64 {
19 Default,
20 U16,
21 B32,
22};
23
24} // Anonymous namespace
25
26void TranslatorVisitor::ISBERD(u64 insn) {
27 union {
28 u64 raw;
29 BitField<0, 8, IR::Reg> dest_reg;
30 BitField<8, 8, IR::Reg> src_reg;
31 BitField<31, 1, u64> skew;
32 BitField<32, 1, u64> o;
33 BitField<33, 2, Mode> mode;
34 BitField<47, 2, Shift> shift;
35 } const isberd{insn};
36
37 if (isberd.skew != 0) {
38 throw NotImplementedException("SKEW");
39 }
40 if (isberd.o != 0) {
41 throw NotImplementedException("O");
42 }
43 if (isberd.mode != Mode::Default) {
44 throw NotImplementedException("Mode {}", isberd.mode.Value());
45 }
46 if (isberd.shift != Shift::Default) {
47 throw NotImplementedException("Shift {}", isberd.shift.Value());
48 }
49 LOG_WARNING(Shader, "(STUBBED) called");
50 X(isberd.dest_reg, X(isberd.src_reg));
51}
52
53} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 000000000..2300088e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
9
10namespace Shader::Maxwell {
11using namespace LDC;
12namespace {
13std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
14 const IR::U32& reg, const IR::U32& imm) {
15 switch (mode) {
16 case Mode::Default:
17 return {imm_index, ir.IAdd(reg, imm)};
18 default:
19 break;
20 }
21 throw NotImplementedException("Mode {}", mode);
22}
23} // Anonymous namespace
24
25void TranslatorVisitor::LDC(u64 insn) {
26 const Encoding ldc{insn};
27 const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
28 const IR::U32 reg{X(ldc.src_reg)};
29 const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
30 const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
31 switch (ldc.size) {
32 case Size::U8:
33 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
34 break;
35 case Size::S8:
36 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
37 break;
38 case Size::U16:
39 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
40 break;
41 case Size::S16:
42 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
43 break;
44 case Size::B32:
45 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
46 break;
47 case Size::B64: {
48 if (!IR::IsAligned(ldc.dest_reg, 2)) {
49 throw NotImplementedException("Unaligned destination register");
50 }
51 const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
52 for (int i = 0; i < 2; ++i) {
53 X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
54 }
55 break;
56 }
57 default:
58 throw NotImplementedException("Invalid size {}", ldc.size.Value());
59 }
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 000000000..3074ea0e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/reg.h"
10
11namespace Shader::Maxwell::LDC {
12
13enum class Mode : u64 {
14 Default,
15 IL,
16 IS,
17 ISL,
18};
19
20enum class Size : u64 {
21 U8,
22 S8,
23 U16,
24 S16,
25 B32,
26 B64,
27};
28
29union Encoding {
30 u64 raw;
31 BitField<0, 8, IR::Reg> dest_reg;
32 BitField<8, 8, IR::Reg> src_reg;
33 BitField<20, 16, s64> offset;
34 BitField<36, 5, u64> index;
35 BitField<44, 2, Mode> mode;
36 BitField<48, 3, Size> size;
37};
38
39} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
new file mode 100644
index 000000000..4a0f04e47
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
12 bool neg, bool x) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> offset_lo_reg;
17 BitField<47, 1, u64> cc;
18 BitField<48, 3, IR::Pred> pred;
19 } const lea{insn};
20
21 if (x) {
22 throw NotImplementedException("LEA.HI X");
23 }
24 if (lea.pred != IR::Pred::PT) {
25 throw NotImplementedException("LEA.HI Pred");
26 }
27 if (lea.cc != 0) {
28 throw NotImplementedException("LEA.HI CC");
29 }
30
31 const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
32 const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
33 const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
34
35 const s32 hi_scale{32 - static_cast<s32>(scale)};
36 const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
37 const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
38
39 IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
40 v.X(lea.dest_reg, result);
41}
42
43void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
44 union {
45 u64 insn;
46 BitField<0, 8, IR::Reg> dest_reg;
47 BitField<8, 8, IR::Reg> offset_lo_reg;
48 BitField<39, 5, u64> scale;
49 BitField<45, 1, u64> neg;
50 BitField<46, 1, u64> x;
51 BitField<47, 1, u64> cc;
52 BitField<48, 3, IR::Pred> pred;
53 } const lea{insn};
54 if (lea.x != 0) {
55 throw NotImplementedException("LEA.LO X");
56 }
57 if (lea.pred != IR::Pred::PT) {
58 throw NotImplementedException("LEA.LO Pred");
59 }
60 if (lea.cc != 0) {
61 throw NotImplementedException("LEA.LO CC");
62 }
63
64 const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
65 const s32 scale{static_cast<s32>(lea.scale)};
66 const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
67 const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
68
69 IR::U32 result{v.ir.IAdd(base, scaled_offset)};
70 v.X(lea.dest_reg, result);
71}
72} // Anonymous namespace
73
74void TranslatorVisitor::LEA_hi_reg(u64 insn) {
75 union {
76 u64 insn;
77 BitField<28, 5, u64> scale;
78 BitField<37, 1, u64> neg;
79 BitField<38, 1, u64> x;
80 } const lea{insn};
81
82 LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
83}
84
85void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
86 union {
87 u64 insn;
88 BitField<51, 5, u64> scale;
89 BitField<56, 1, u64> neg;
90 BitField<57, 1, u64> x;
91 } const lea{insn};
92
93 LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
94}
95
96void TranslatorVisitor::LEA_lo_reg(u64 insn) {
97 LEA_lo(*this, insn, GetReg20(insn));
98}
99
100void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
101 LEA_lo(*this, insn, GetCbuf(insn));
102}
103
104void TranslatorVisitor::LEA_lo_imm(u64 insn) {
105 LEA_lo(*this, insn, GetImm20(insn));
106}
107
108} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
new file mode 100644
index 000000000..924fb7a40
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -0,0 +1,196 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/ir_emitter.h"
9#include "shader_recompiler/frontend/maxwell/opcodes.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Size : u64 {
15 B32,
16 B64,
17 B96,
18 B128,
19};
20
21enum class InterpolationMode : u64 {
22 Pass,
23 Multiply,
24 Constant,
25 Sc,
26};
27
28enum class SampleMode : u64 {
29 Default,
30 Centroid,
31 Offset,
32};
33
34u32 NumElements(Size size) {
35 switch (size) {
36 case Size::B32:
37 return 1;
38 case Size::B64:
39 return 2;
40 case Size::B96:
41 return 3;
42 case Size::B128:
43 return 4;
44 }
45 throw InvalidArgument("Invalid size {}", size);
46}
47
48template <typename F>
49void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
50 const IR::U32 index_value{v.X(index_reg)};
51 for (u32 element = 0; element < num_elements; ++element) {
52 const IR::U32 final_offset{
53 element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
54 f(element, final_offset);
55 }
56}
57
58} // Anonymous namespace
59
60void TranslatorVisitor::ALD(u64 insn) {
61 union {
62 u64 raw;
63 BitField<0, 8, IR::Reg> dest_reg;
64 BitField<8, 8, IR::Reg> index_reg;
65 BitField<20, 10, u64> absolute_offset;
66 BitField<20, 11, s64> relative_offset;
67 BitField<39, 8, IR::Reg> vertex_reg;
68 BitField<32, 1, u64> o;
69 BitField<31, 1, u64> patch;
70 BitField<47, 2, Size> size;
71 } const ald{insn};
72
73 const u64 offset{ald.absolute_offset.Value()};
74 if (offset % 4 != 0) {
75 throw NotImplementedException("Unaligned absolute offset {}", offset);
76 }
77 const IR::U32 vertex{X(ald.vertex_reg)};
78 const u32 num_elements{NumElements(ald.size)};
79 if (ald.index_reg == IR::Reg::RZ) {
80 for (u32 element = 0; element < num_elements; ++element) {
81 if (ald.patch != 0) {
82 const IR::Patch patch{offset / 4 + element};
83 F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch));
84 } else {
85 const IR::Attribute attr{offset / 4 + element};
86 F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex));
87 }
88 }
89 return;
90 }
91 if (ald.patch != 0) {
92 throw NotImplementedException("Indirect patch read");
93 }
94 HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
95 F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex));
96 });
97}
98
99void TranslatorVisitor::AST(u64 insn) {
100 union {
101 u64 raw;
102 BitField<0, 8, IR::Reg> src_reg;
103 BitField<8, 8, IR::Reg> index_reg;
104 BitField<20, 10, u64> absolute_offset;
105 BitField<20, 11, s64> relative_offset;
106 BitField<31, 1, u64> patch;
107 BitField<39, 8, IR::Reg> vertex_reg;
108 BitField<47, 2, Size> size;
109 } const ast{insn};
110
111 if (ast.index_reg != IR::Reg::RZ) {
112 throw NotImplementedException("Indexed store");
113 }
114 const u64 offset{ast.absolute_offset.Value()};
115 if (offset % 4 != 0) {
116 throw NotImplementedException("Unaligned absolute offset {}", offset);
117 }
118 const IR::U32 vertex{X(ast.vertex_reg)};
119 const u32 num_elements{NumElements(ast.size)};
120 if (ast.index_reg == IR::Reg::RZ) {
121 for (u32 element = 0; element < num_elements; ++element) {
122 if (ast.patch != 0) {
123 const IR::Patch patch{offset / 4 + element};
124 ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element)));
125 } else {
126 const IR::Attribute attr{offset / 4 + element};
127 ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex);
128 }
129 }
130 return;
131 }
132 if (ast.patch != 0) {
133 throw NotImplementedException("Indexed tessellation patch store");
134 }
135 HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
136 ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex);
137 });
138}
139
140void TranslatorVisitor::IPA(u64 insn) {
141 // IPA is the instruction used to read varyings from a fragment shader.
142 // gl_FragCoord is mapped to the gl_Position attribute.
143 // It yields unknown results when used outside of the fragment shader stage.
144 union {
145 u64 raw;
146 BitField<0, 8, IR::Reg> dest_reg;
147 BitField<8, 8, IR::Reg> index_reg;
148 BitField<20, 8, IR::Reg> multiplier;
149 BitField<30, 8, IR::Attribute> attribute;
150 BitField<38, 1, u64> idx;
151 BitField<51, 1, u64> sat;
152 BitField<52, 2, SampleMode> sample_mode;
153 BitField<54, 2, InterpolationMode> interpolation_mode;
154 } const ipa{insn};
155
156 // Indexed IPAs are used for indexed varyings.
157 // For example:
158 //
159 // in vec4 colors[4];
160 // uniform int idx;
161 // void main() {
162 // gl_FragColor = colors[idx];
163 // }
164 const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
165 const IR::Attribute attribute{ipa.attribute};
166 IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
167 : ir.GetAttribute(attribute)};
168 if (IR::IsGeneric(attribute)) {
169 const ProgramHeader& sph{env.SPH()};
170 const u32 attr_index{IR::GenericAttributeIndex(attribute)};
171 const u32 element{static_cast<u32>(attribute) % 4};
172 const std::array input_map{sph.ps.GenericInputMap(attr_index)};
173 const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
174 if (is_perspective) {
175 const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
176 value = ir.FPMul(value, position_w);
177 }
178 }
179 if (ipa.interpolation_mode == InterpolationMode::Multiply) {
180 value = ir.FPMul(value, F(ipa.multiplier));
181 }
182
183 // Saturated IPAs are generally generated out of clamped varyings.
184 // For example: clamp(some_varying, 0.0, 1.0)
185 const bool is_saturated{ipa.sat != 0};
186 if (is_saturated) {
187 if (attribute == IR::Attribute::FrontFace) {
188 throw NotImplementedException("IPA.SAT on FrontFace");
189 }
190 value = ir.FPSaturate(value);
191 }
192
193 F(ipa.dest_reg, value);
194}
195
196} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..d2a1dbf61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,218 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Size : u64 {
12 U8,
13 S8,
14 U16,
15 S16,
16 B32,
17 B64,
18 B128,
19};
20
21IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
22 union {
23 u64 raw;
24 BitField<8, 8, IR::Reg> offset_reg;
25 BitField<20, 24, u64> absolute_offset;
26 BitField<20, 24, s64> relative_offset;
27 } const encoding{insn};
28
29 if (encoding.offset_reg == IR::Reg::RZ) {
30 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
31 } else {
32 const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
33 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
34 }
35}
36
37std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
38 const IR::U32 offset{Offset(v, insn)};
39 if (offset.IsImmediate()) {
40 return {v.ir.Imm32(offset.U32() / 4), offset};
41 } else {
42 return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
43 }
44}
45
46std::pair<int, bool> GetSize(u64 insn) {
47 union {
48 u64 raw;
49 BitField<48, 3, Size> size;
50 } const encoding{insn};
51
52 switch (encoding.size) {
53 case Size::U8:
54 return {8, false};
55 case Size::S8:
56 return {8, true};
57 case Size::U16:
58 return {16, false};
59 case Size::S16:
60 return {16, true};
61 case Size::B32:
62 return {32, false};
63 case Size::B64:
64 return {64, false};
65 case Size::B128:
66 return {128, false};
67 default:
68 throw NotImplementedException("Invalid size {}", encoding.size.Value());
69 }
70}
71
72IR::Reg Reg(u64 insn) {
73 union {
74 u64 raw;
75 BitField<0, 8, IR::Reg> reg;
76 } const encoding{insn};
77
78 return encoding.reg;
79}
80
81IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
82 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
83}
84
85IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
86 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
87}
88
89IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) {
90 const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())};
91 const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)};
92 return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))};
93}
94} // Anonymous namespace
95
96void TranslatorVisitor::LDL(u64 insn) {
97 const auto [word_offset, offset]{WordOffset(*this, insn)};
98 const IR::U32 word{LoadLocal(*this, word_offset, offset)};
99 const IR::Reg dest{Reg(insn)};
100 const auto [bit_size, is_signed]{GetSize(insn)};
101 switch (bit_size) {
102 case 8: {
103 const IR::U32 bit{ByteOffset(ir, offset)};
104 X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed));
105 break;
106 }
107 case 16: {
108 const IR::U32 bit{ShortOffset(ir, offset)};
109 X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed));
110 break;
111 }
112 case 32:
113 case 64:
114 case 128:
115 if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
116 throw NotImplementedException("Unaligned destination register {}", dest);
117 }
118 X(dest, word);
119 for (int i = 1; i < bit_size / 32; ++i) {
120 const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))};
121 const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))};
122 X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset));
123 }
124 break;
125 }
126}
127
128void TranslatorVisitor::LDS(u64 insn) {
129 const IR::U32 offset{Offset(*this, insn)};
130 const IR::Reg dest{Reg(insn)};
131 const auto [bit_size, is_signed]{GetSize(insn)};
132 const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
133 switch (bit_size) {
134 case 8:
135 case 16:
136 case 32:
137 X(dest, IR::U32{value});
138 break;
139 case 64:
140 case 128:
141 if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
142 throw NotImplementedException("Unaligned destination register {}", dest);
143 }
144 for (int element = 0; element < bit_size / 32; ++element) {
145 X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
146 }
147 break;
148 }
149}
150
151void TranslatorVisitor::STL(u64 insn) {
152 const auto [word_offset, offset]{WordOffset(*this, insn)};
153 if (offset.IsImmediate()) {
154 // TODO: Support storing out of bounds at runtime
155 if (offset.U32() >= env.LocalMemorySize()) {
156 LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping",
157 offset.U32(), env.LocalMemorySize());
158 return;
159 }
160 }
161 const IR::Reg reg{Reg(insn)};
162 const IR::U32 src{X(reg)};
163 const int bit_size{GetSize(insn).first};
164 switch (bit_size) {
165 case 8: {
166 const IR::U32 bit{ByteOffset(ir, offset)};
167 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
168 ir.WriteLocal(word_offset, value);
169 break;
170 }
171 case 16: {
172 const IR::U32 bit{ShortOffset(ir, offset)};
173 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
174 ir.WriteLocal(word_offset, value);
175 break;
176 }
177 case 32:
178 case 64:
179 case 128:
180 if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
181 throw NotImplementedException("Unaligned source register");
182 }
183 ir.WriteLocal(word_offset, src);
184 for (int i = 1; i < bit_size / 32; ++i) {
185 ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
186 }
187 break;
188 }
189}
190
191void TranslatorVisitor::STS(u64 insn) {
192 const IR::U32 offset{Offset(*this, insn)};
193 const IR::Reg reg{Reg(insn)};
194 const int bit_size{GetSize(insn).first};
195 switch (bit_size) {
196 case 8:
197 case 16:
198 case 32:
199 ir.WriteShared(bit_size, offset, X(reg));
200 break;
201 case 64:
202 if (!IR::IsAligned(reg, 2)) {
203 throw NotImplementedException("Unaligned source register {}", reg);
204 }
205 ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
206 break;
207 case 128: {
208 if (!IR::IsAligned(reg, 2)) {
209 throw NotImplementedException("Unaligned source register {}", reg);
210 }
211 const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
212 ir.WriteShared(128, offset, vector);
213 break;
214 }
215 }
216}
217
218} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
new file mode 100644
index 000000000..36c5cff2f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -0,0 +1,184 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class LoadSize : u64 {
14 U8, // Zero-extend
15 S8, // Sign-extend
16 U16, // Zero-extend
17 S16, // Sign-extend
18 B32,
19 B64,
20 B128,
21 U128, // ???
22};
23
24enum class StoreSize : u64 {
25 U8, // Zero-extend
26 S8, // Sign-extend
27 U16, // Zero-extend
28 S16, // Sign-extend
29 B32,
30 B64,
31 B128,
32};
33
34// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
35enum class LoadCache : u64 {
36 CA, // Cache at all levels, likely to be accessed again
37 CG, // Cache at global level (cache in L2 and below, not L1)
38 CI, // ???
39 CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
40};
41
42// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
43enum class StoreCache : u64 {
44 WB, // Cache write-back all coherent levels
45 CG, // Cache at global level
46 CS, // Cache streaming, likely to be accessed once
47 WT, // Cache write-through (to system memory)
48};
49
50IR::U64 Address(TranslatorVisitor& v, u64 insn) {
51 union {
52 u64 raw;
53 BitField<8, 8, IR::Reg> addr_reg;
54 BitField<20, 24, s64> addr_offset;
55 BitField<20, 24, u64> rz_addr_offset;
56 BitField<45, 1, u64> e;
57 } const mem{insn};
58
59 const IR::U64 address{[&]() -> IR::U64 {
60 if (mem.e == 0) {
61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it
62 return v.ir.UConvert(64, v.X(mem.addr_reg));
63 }
64 if (!IR::IsAligned(mem.addr_reg, 2)) {
65 throw NotImplementedException("Unaligned address register");
66 }
67 // Pack two registers to build the 64-bit address
68 return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
69 }()};
70 const u64 addr_offset{[&]() -> u64 {
71 if (mem.addr_reg == IR::Reg::RZ) {
72 // When RZ is used, the address is an absolute address
73 return static_cast<u64>(mem.rz_addr_offset.Value());
74 } else {
75 return static_cast<u64>(mem.addr_offset.Value());
76 }
77 }()};
78 // Apply the offset
79 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::LDG(u64 insn) {
84 // LDG loads global memory into registers
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<46, 2, LoadCache> cache;
89 BitField<48, 3, LoadSize> size;
90 } const ldg{insn};
91
92 // Pointer to load data from
93 const IR::U64 address{Address(*this, insn)};
94 const IR::Reg dest_reg{ldg.dest_reg};
95 switch (ldg.size) {
96 case LoadSize::U8:
97 X(dest_reg, ir.LoadGlobalU8(address));
98 break;
99 case LoadSize::S8:
100 X(dest_reg, ir.LoadGlobalS8(address));
101 break;
102 case LoadSize::U16:
103 X(dest_reg, ir.LoadGlobalU16(address));
104 break;
105 case LoadSize::S16:
106 X(dest_reg, ir.LoadGlobalS16(address));
107 break;
108 case LoadSize::B32:
109 X(dest_reg, ir.LoadGlobal32(address));
110 break;
111 case LoadSize::B64: {
112 if (!IR::IsAligned(dest_reg, 2)) {
113 throw NotImplementedException("Unaligned data registers");
114 }
115 const IR::Value vector{ir.LoadGlobal64(address)};
116 for (int i = 0; i < 2; ++i) {
117 X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
118 }
119 break;
120 }
121 case LoadSize::B128:
122 case LoadSize::U128: {
123 if (!IR::IsAligned(dest_reg, 4)) {
124 throw NotImplementedException("Unaligned data registers");
125 }
126 const IR::Value vector{ir.LoadGlobal128(address)};
127 for (int i = 0; i < 4; ++i) {
128 X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
129 }
130 break;
131 }
132 default:
133 throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
134 }
135}
136
137void TranslatorVisitor::STG(u64 insn) {
138 // STG stores registers into global memory.
139 union {
140 u64 raw;
141 BitField<0, 8, IR::Reg> data_reg;
142 BitField<46, 2, StoreCache> cache;
143 BitField<48, 3, StoreSize> size;
144 } const stg{insn};
145
146 // Pointer to store data into
147 const IR::U64 address{Address(*this, insn)};
148 const IR::Reg data_reg{stg.data_reg};
149 switch (stg.size) {
150 case StoreSize::U8:
151 ir.WriteGlobalU8(address, X(data_reg));
152 break;
153 case StoreSize::S8:
154 ir.WriteGlobalS8(address, X(data_reg));
155 break;
156 case StoreSize::U16:
157 ir.WriteGlobalU16(address, X(data_reg));
158 break;
159 case StoreSize::S16:
160 ir.WriteGlobalS16(address, X(data_reg));
161 break;
162 case StoreSize::B32:
163 ir.WriteGlobal32(address, X(data_reg));
164 break;
165 case StoreSize::B64: {
166 if (!IR::IsAligned(data_reg, 2)) {
167 throw NotImplementedException("Unaligned data registers");
168 }
169 const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
170 ir.WriteGlobal64(address, vector);
171 break;
172 }
173 case StoreSize::B128:
174 if (!IR::IsAligned(data_reg, 4)) {
175 throw NotImplementedException("Unaligned data registers");
176 }
177 const IR::Value vector{
178 ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
179 ir.WriteGlobal128(address, vector);
180 break;
181 }
182}
183
184} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 000000000..92cd27ed4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,116 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class LogicalOp : u64 {
13 AND,
14 OR,
15 XOR,
16 PASS_B,
17};
18
19[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
20 const IR::U32& operand_2, LogicalOp op) {
21 switch (op) {
22 case LogicalOp::AND:
23 return ir.BitwiseAnd(operand_1, operand_2);
24 case LogicalOp::OR:
25 return ir.BitwiseOr(operand_1, operand_2);
26 case LogicalOp::XOR:
27 return ir.BitwiseXor(operand_1, operand_2);
28 case LogicalOp::PASS_B:
29 return operand_2;
30 default:
31 throw NotImplementedException("Invalid Logical operation {}", op);
32 }
33}
34
35void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
36 LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
37 IR::Pred dest_pred = IR::Pred::PT) {
38 union {
39 u64 insn;
40 BitField<0, 8, IR::Reg> dest_reg;
41 BitField<8, 8, IR::Reg> src_reg;
42 } const lop{insn};
43
44 if (x) {
45 throw NotImplementedException("X");
46 }
47 IR::U32 op_a{v.X(lop.src_reg)};
48 if (inv_a != 0) {
49 op_a = v.ir.BitwiseNot(op_a);
50 }
51 if (inv_b != 0) {
52 op_b = v.ir.BitwiseNot(op_b);
53 }
54
55 const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
56 if (pred_op) {
57 const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
58 v.ir.SetPred(dest_pred, pred_result);
59 }
60 if (cc) {
61 if (bit_op == LogicalOp::PASS_B) {
62 v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
63 v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
64 } else {
65 v.SetZFlag(v.ir.GetZeroFromOp(result));
66 v.SetSFlag(v.ir.GetSignFromOp(result));
67 }
68 v.ResetCFlag();
69 v.ResetOFlag();
70 }
71 v.X(lop.dest_reg, result);
72}
73
74void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
75 union {
76 u64 insn;
77 BitField<39, 1, u64> inv_a;
78 BitField<40, 1, u64> inv_b;
79 BitField<41, 2, LogicalOp> bit_op;
80 BitField<43, 1, u64> x;
81 BitField<44, 2, PredicateOp> pred_op;
82 BitField<47, 1, u64> cc;
83 BitField<48, 3, IR::Pred> dest_pred;
84 } const lop{insn};
85
86 LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
87 lop.pred_op, lop.dest_pred);
88}
89} // Anonymous namespace
90
91void TranslatorVisitor::LOP_reg(u64 insn) {
92 LOP(*this, insn, GetReg20(insn));
93}
94
95void TranslatorVisitor::LOP_cbuf(u64 insn) {
96 LOP(*this, insn, GetCbuf(insn));
97}
98
99void TranslatorVisitor::LOP_imm(u64 insn) {
100 LOP(*this, insn, GetImm20(insn));
101}
102
103void TranslatorVisitor::LOP32I(u64 insn) {
104 union {
105 u64 raw;
106 BitField<53, 2, LogicalOp> bit_op;
107 BitField<57, 1, u64> x;
108 BitField<52, 1, u64> cc;
109 BitField<55, 1, u64> inv_a;
110 BitField<56, 1, u64> inv_b;
111 } const lop32i{insn};
112
113 LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
114 lop32i.inv_b != 0, lop32i.bit_op);
115}
116} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 000000000..e0fe47912
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,122 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
13// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
14IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
15 u64 ttbl) {
16 IR::U32 r{ir.Imm32(0)};
17 const IR::U32 not_a{ir.BitwiseNot(a)};
18 const IR::U32 not_b{ir.BitwiseNot(b)};
19 const IR::U32 not_c{ir.BitwiseNot(c)};
20 if (ttbl & 0x01) {
21 // r |= ~a & ~b & ~c;
22 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
23 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
24 r = ir.BitwiseOr(r, rhs);
25 }
26 if (ttbl & 0x02) {
27 // r |= ~a & ~b & c;
28 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
29 const auto rhs{ir.BitwiseAnd(lhs, c)};
30 r = ir.BitwiseOr(r, rhs);
31 }
32 if (ttbl & 0x04) {
33 // r |= ~a & b & ~c;
34 const auto lhs{ir.BitwiseAnd(not_a, b)};
35 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
36 r = ir.BitwiseOr(r, rhs);
37 }
38 if (ttbl & 0x08) {
39 // r |= ~a & b & c;
40 const auto lhs{ir.BitwiseAnd(not_a, b)};
41 const auto rhs{ir.BitwiseAnd(lhs, c)};
42 r = ir.BitwiseOr(r, rhs);
43 }
44 if (ttbl & 0x10) {
45 // r |= a & ~b & ~c;
46 const auto lhs{ir.BitwiseAnd(a, not_b)};
47 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
48 r = ir.BitwiseOr(r, rhs);
49 }
50 if (ttbl & 0x20) {
51 // r |= a & ~b & c;
52 const auto lhs{ir.BitwiseAnd(a, not_b)};
53 const auto rhs{ir.BitwiseAnd(lhs, c)};
54 r = ir.BitwiseOr(r, rhs);
55 }
56 if (ttbl & 0x40) {
57 // r |= a & b & ~c;
58 const auto lhs{ir.BitwiseAnd(a, b)};
59 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
60 r = ir.BitwiseOr(r, rhs);
61 }
62 if (ttbl & 0x80) {
63 // r |= a & b & c;
64 const auto lhs{ir.BitwiseAnd(a, b)};
65 const auto rhs{ir.BitwiseAnd(lhs, c)};
66 r = ir.BitwiseOr(r, rhs);
67 }
68 return r;
69}
70
71IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
72 union {
73 u64 insn;
74 BitField<0, 8, IR::Reg> dest_reg;
75 BitField<8, 8, IR::Reg> src_reg;
76 BitField<47, 1, u64> cc;
77 } const lop3{insn};
78
79 if (lop3.cc != 0) {
80 throw NotImplementedException("LOP3 CC");
81 }
82
83 const IR::U32 op_a{v.X(lop3.src_reg)};
84 const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
85 v.X(lop3.dest_reg, result);
86 return result;
87}
88
89u64 GetLut48(u64 insn) {
90 union {
91 u64 raw;
92 BitField<48, 8, u64> lut;
93 } const lut{insn};
94 return lut.lut;
95}
96} // Anonymous namespace
97
98void TranslatorVisitor::LOP3_reg(u64 insn) {
99 union {
100 u64 insn;
101 BitField<28, 8, u64> lut;
102 BitField<38, 1, u64> x;
103 BitField<36, 2, PredicateOp> pred_op;
104 BitField<48, 3, IR::Pred> pred;
105 } const lop3{insn};
106
107 if (lop3.x != 0) {
108 throw NotImplementedException("LOP3 X");
109 }
110 const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
111 const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
112 ir.SetPred(lop3.pred, pred_result);
113}
114
115void TranslatorVisitor::LOP3_cbuf(u64 insn) {
116 LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
117}
118
119void TranslatorVisitor::LOP3_imm(u64 insn) {
120 LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
121}
122} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 000000000..4324fd443
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 PR,
13 CC,
14};
15} // Anonymous namespace
16
17void TranslatorVisitor::P2R_reg(u64) {
18 throw NotImplementedException("P2R (reg)");
19}
20
21void TranslatorVisitor::P2R_cbuf(u64) {
22 throw NotImplementedException("P2R (cbuf)");
23}
24
25void TranslatorVisitor::P2R_imm(u64 insn) {
26 union {
27 u64 raw;
28 BitField<0, 8, IR::Reg> dest_reg;
29 BitField<8, 8, IR::Reg> src;
30 BitField<40, 1, Mode> mode;
31 BitField<41, 2, u64> byte_selector;
32 } const p2r{insn};
33
34 const u32 mask{GetImm20(insn).U32()};
35 const bool pr_mode{p2r.mode == Mode::PR};
36 const u32 num_items{pr_mode ? 7U : 4U};
37 const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
38 IR::U32 insert{ir.Imm32(0)};
39 for (u32 index = 0; index < num_items; ++index) {
40 if (((mask >> index) & 1) == 0) {
41 continue;
42 }
43 const IR::U1 cond{[this, index, pr_mode] {
44 if (pr_mode) {
45 return ir.GetPred(IR::Pred{index});
46 }
47 switch (index) {
48 case 0:
49 return ir.GetZFlag();
50 case 1:
51 return ir.GetSFlag();
52 case 2:
53 return ir.GetCFlag();
54 case 3:
55 return ir.GetOFlag();
56 }
57 throw LogicError("Unreachable P2R index");
58 }()};
59 const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
60 insert = ir.BitwiseOr(insert, bit);
61 }
62 const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
63 X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
64}
65
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 000000000..6bb08db8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<39, 4, u64> mask;
18 BitField<12, 4, u64> mov32i_mask;
19 } const mov{insn};
20
21 if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
22 throw NotImplementedException("Non-full move mask");
23 }
24 v.X(mov.dest_reg, src);
25}
26} // Anonymous namespace
27
28void TranslatorVisitor::MOV_reg(u64 insn) {
29 MOV(*this, insn, GetReg20(insn));
30}
31
32void TranslatorVisitor::MOV_cbuf(u64 insn) {
33 MOV(*this, insn, GetCbuf(insn));
34}
35
36void TranslatorVisitor::MOV_imm(u64 insn) {
37 MOV(*this, insn, GetImm20(insn));
38}
39
40void TranslatorVisitor::MOV32I(u64 insn) {
41 MOV(*this, insn, GetImm32(insn), true);
42}
43
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
new file mode 100644
index 000000000..eda5f177b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 PR,
13 CC,
14};
15
16void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
17 switch (index) {
18 case 0:
19 return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
20 case 1:
21 return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
22 case 2:
23 return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
24 case 3:
25 return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
26 default:
27 throw LogicError("Unreachable R2P index");
28 }
29}
30
31void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
32 union {
33 u64 raw;
34 BitField<8, 8, IR::Reg> src_reg;
35 BitField<40, 1, Mode> mode;
36 BitField<41, 2, u64> byte_selector;
37 } const r2p{insn};
38 const IR::U32 src{v.X(r2p.src_reg)};
39 const IR::U32 count{v.ir.Imm32(1)};
40 const bool pr_mode{r2p.mode == Mode::PR};
41 const u32 num_items{pr_mode ? 7U : 4U};
42 const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
43 for (u32 index = 0; index < num_items; ++index) {
44 const IR::U32 offset{v.ir.Imm32(offset_base + index)};
45 const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
46 const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
47 const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
48 const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
49 if (pr_mode) {
50 const IR::Pred pred{index};
51 v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
52 } else {
53 SetFlag(v.ir, inv_mask_bit, src_bit, index);
54 }
55 }
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::R2P_reg(u64 insn) {
60 R2P(*this, insn, GetReg20(insn));
61}
62
63void TranslatorVisitor::R2P_cbuf(u64 insn) {
64 R2P(*this, insn, GetCbuf(insn));
65}
66
67void TranslatorVisitor::R2P_imm(u64 insn) {
68 R2P(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..20cb2674e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,181 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SpecialRegister : u64 {
12 SR_LANEID = 0,
13 SR_CLOCK = 1,
14 SR_VIRTCFG = 2,
15 SR_VIRTID = 3,
16 SR_PM0 = 4,
17 SR_PM1 = 5,
18 SR_PM2 = 6,
19 SR_PM3 = 7,
20 SR_PM4 = 8,
21 SR_PM5 = 9,
22 SR_PM6 = 10,
23 SR_PM7 = 11,
24 SR12 = 12,
25 SR13 = 13,
26 SR14 = 14,
27 SR_ORDERING_TICKET = 15,
28 SR_PRIM_TYPE = 16,
29 SR_INVOCATION_ID = 17,
30 SR_Y_DIRECTION = 18,
31 SR_THREAD_KILL = 19,
32 SM_SHADER_TYPE = 20,
33 SR_DIRECTCBEWRITEADDRESSLOW = 21,
34 SR_DIRECTCBEWRITEADDRESSHIGH = 22,
35 SR_DIRECTCBEWRITEENABLE = 23,
36 SR_MACHINE_ID_0 = 24,
37 SR_MACHINE_ID_1 = 25,
38 SR_MACHINE_ID_2 = 26,
39 SR_MACHINE_ID_3 = 27,
40 SR_AFFINITY = 28,
41 SR_INVOCATION_INFO = 29,
42 SR_WSCALEFACTOR_XY = 30,
43 SR_WSCALEFACTOR_Z = 31,
44 SR_TID = 32,
45 SR_TID_X = 33,
46 SR_TID_Y = 34,
47 SR_TID_Z = 35,
48 SR_CTA_PARAM = 36,
49 SR_CTAID_X = 37,
50 SR_CTAID_Y = 38,
51 SR_CTAID_Z = 39,
52 SR_NTID = 40,
53 SR_CirQueueIncrMinusOne = 41,
54 SR_NLATC = 42,
55 SR43 = 43,
56 SR_SM_SPA_VERSION = 44,
57 SR_MULTIPASSSHADERINFO = 45,
58 SR_LWINHI = 46,
59 SR_SWINHI = 47,
60 SR_SWINLO = 48,
61 SR_SWINSZ = 49,
62 SR_SMEMSZ = 50,
63 SR_SMEMBANKS = 51,
64 SR_LWINLO = 52,
65 SR_LWINSZ = 53,
66 SR_LMEMLOSZ = 54,
67 SR_LMEMHIOFF = 55,
68 SR_EQMASK = 56,
69 SR_LTMASK = 57,
70 SR_LEMASK = 58,
71 SR_GTMASK = 59,
72 SR_GEMASK = 60,
73 SR_REGALLOC = 61,
74 SR_BARRIERALLOC = 62,
75 SR63 = 63,
76 SR_GLOBALERRORSTATUS = 64,
77 SR65 = 65,
78 SR_WARPERRORSTATUS = 66,
79 SR_WARPERRORSTATUSCLEAR = 67,
80 SR68 = 68,
81 SR69 = 69,
82 SR70 = 70,
83 SR71 = 71,
84 SR_PM_HI0 = 72,
85 SR_PM_HI1 = 73,
86 SR_PM_HI2 = 74,
87 SR_PM_HI3 = 75,
88 SR_PM_HI4 = 76,
89 SR_PM_HI5 = 77,
90 SR_PM_HI6 = 78,
91 SR_PM_HI7 = 79,
92 SR_CLOCKLO = 80,
93 SR_CLOCKHI = 81,
94 SR_GLOBALTIMERLO = 82,
95 SR_GLOBALTIMERHI = 83,
96 SR84 = 84,
97 SR85 = 85,
98 SR86 = 86,
99 SR87 = 87,
100 SR88 = 88,
101 SR89 = 89,
102 SR90 = 90,
103 SR91 = 91,
104 SR92 = 92,
105 SR93 = 93,
106 SR94 = 94,
107 SR95 = 95,
108 SR_HWTASKID = 96,
109 SR_CIRCULARQUEUEENTRYINDEX = 97,
110 SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
111 SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
112};
113
114[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
115 switch (special_register) {
116 case SpecialRegister::SR_INVOCATION_ID:
117 return ir.InvocationId();
118 case SpecialRegister::SR_THREAD_KILL:
119 return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
120 case SpecialRegister::SR_INVOCATION_INFO:
121 LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
122 return ir.Imm32(0x00ff'0000);
123 case SpecialRegister::SR_TID: {
124 const IR::Value tid{ir.LocalInvocationId()};
125 return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
126 IR::U32{ir.CompositeExtract(tid, 1)},
127 ir.Imm32(16), ir.Imm32(8)),
128 IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
129 }
130 case SpecialRegister::SR_TID_X:
131 return ir.LocalInvocationIdX();
132 case SpecialRegister::SR_TID_Y:
133 return ir.LocalInvocationIdY();
134 case SpecialRegister::SR_TID_Z:
135 return ir.LocalInvocationIdZ();
136 case SpecialRegister::SR_CTAID_X:
137 return ir.WorkgroupIdX();
138 case SpecialRegister::SR_CTAID_Y:
139 return ir.WorkgroupIdY();
140 case SpecialRegister::SR_CTAID_Z:
141 return ir.WorkgroupIdZ();
142 case SpecialRegister::SR_WSCALEFACTOR_XY:
143 LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
144 return ir.Imm32(Common::BitCast<u32>(1.0f));
145 case SpecialRegister::SR_WSCALEFACTOR_Z:
146 LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
147 return ir.Imm32(Common::BitCast<u32>(1.0f));
148 case SpecialRegister::SR_LANEID:
149 return ir.LaneId();
150 case SpecialRegister::SR_EQMASK:
151 return ir.SubgroupEqMask();
152 case SpecialRegister::SR_LTMASK:
153 return ir.SubgroupLtMask();
154 case SpecialRegister::SR_LEMASK:
155 return ir.SubgroupLeMask();
156 case SpecialRegister::SR_GTMASK:
157 return ir.SubgroupGtMask();
158 case SpecialRegister::SR_GEMASK:
159 return ir.SubgroupGeMask();
160 case SpecialRegister::SR_Y_DIRECTION:
161 return ir.BitCast<IR::U32>(ir.YDirection());
162 case SpecialRegister::SR_AFFINITY:
163 LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
164 return ir.Imm32(0); // This is the default value hardware returns.
165 default:
166 throw NotImplementedException("S2R special register {}", special_register);
167 }
168}
169} // Anonymous namespace
170
171void TranslatorVisitor::S2R(u64 insn) {
172 union {
173 u64 raw;
174 BitField<0, 8, IR::Reg> dest_reg;
175 BitField<20, 8, SpecialRegister> src_reg;
176 } const s2r{insn};
177
178 X(s2r.dest_reg, Read(ir, s2r.src_reg));
179}
180
181} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
new file mode 100644
index 000000000..7e26ab359
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -0,0 +1,283 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
13 throw NotImplementedException("Instruction {} is not implemented", opcode);
14}
15
16void TranslatorVisitor::ATOM_cas(u64) {
17 ThrowNotImplemented(Opcode::ATOM_cas);
18}
19
20void TranslatorVisitor::ATOMS_cas(u64) {
21 ThrowNotImplemented(Opcode::ATOMS_cas);
22}
23
24void TranslatorVisitor::B2R(u64) {
25 ThrowNotImplemented(Opcode::B2R);
26}
27
28void TranslatorVisitor::BPT(u64) {
29 ThrowNotImplemented(Opcode::BPT);
30}
31
32void TranslatorVisitor::BRA(u64) {
33 ThrowNotImplemented(Opcode::BRA);
34}
35
36void TranslatorVisitor::BRK(u64) {
37 ThrowNotImplemented(Opcode::BRK);
38}
39
40void TranslatorVisitor::CAL() {
41 // CAL is a no-op
42}
43
44void TranslatorVisitor::CCTL(u64) {
45 ThrowNotImplemented(Opcode::CCTL);
46}
47
48void TranslatorVisitor::CCTLL(u64) {
49 ThrowNotImplemented(Opcode::CCTLL);
50}
51
52void TranslatorVisitor::CONT(u64) {
53 ThrowNotImplemented(Opcode::CONT);
54}
55
56void TranslatorVisitor::CS2R(u64) {
57 ThrowNotImplemented(Opcode::CS2R);
58}
59
60void TranslatorVisitor::FCHK_reg(u64) {
61 ThrowNotImplemented(Opcode::FCHK_reg);
62}
63
64void TranslatorVisitor::FCHK_cbuf(u64) {
65 ThrowNotImplemented(Opcode::FCHK_cbuf);
66}
67
68void TranslatorVisitor::FCHK_imm(u64) {
69 ThrowNotImplemented(Opcode::FCHK_imm);
70}
71
72void TranslatorVisitor::GETCRSPTR(u64) {
73 ThrowNotImplemented(Opcode::GETCRSPTR);
74}
75
76void TranslatorVisitor::GETLMEMBASE(u64) {
77 ThrowNotImplemented(Opcode::GETLMEMBASE);
78}
79
80void TranslatorVisitor::IDE(u64) {
81 ThrowNotImplemented(Opcode::IDE);
82}
83
84void TranslatorVisitor::IDP_reg(u64) {
85 ThrowNotImplemented(Opcode::IDP_reg);
86}
87
88void TranslatorVisitor::IDP_imm(u64) {
89 ThrowNotImplemented(Opcode::IDP_imm);
90}
91
92void TranslatorVisitor::IMAD_reg(u64) {
93 ThrowNotImplemented(Opcode::IMAD_reg);
94}
95
96void TranslatorVisitor::IMAD_rc(u64) {
97 ThrowNotImplemented(Opcode::IMAD_rc);
98}
99
100void TranslatorVisitor::IMAD_cr(u64) {
101 ThrowNotImplemented(Opcode::IMAD_cr);
102}
103
104void TranslatorVisitor::IMAD_imm(u64) {
105 ThrowNotImplemented(Opcode::IMAD_imm);
106}
107
108void TranslatorVisitor::IMAD32I(u64) {
109 ThrowNotImplemented(Opcode::IMAD32I);
110}
111
112void TranslatorVisitor::IMADSP_reg(u64) {
113 ThrowNotImplemented(Opcode::IMADSP_reg);
114}
115
116void TranslatorVisitor::IMADSP_rc(u64) {
117 ThrowNotImplemented(Opcode::IMADSP_rc);
118}
119
120void TranslatorVisitor::IMADSP_cr(u64) {
121 ThrowNotImplemented(Opcode::IMADSP_cr);
122}
123
124void TranslatorVisitor::IMADSP_imm(u64) {
125 ThrowNotImplemented(Opcode::IMADSP_imm);
126}
127
128void TranslatorVisitor::IMUL_reg(u64) {
129 ThrowNotImplemented(Opcode::IMUL_reg);
130}
131
132void TranslatorVisitor::IMUL_cbuf(u64) {
133 ThrowNotImplemented(Opcode::IMUL_cbuf);
134}
135
136void TranslatorVisitor::IMUL_imm(u64) {
137 ThrowNotImplemented(Opcode::IMUL_imm);
138}
139
140void TranslatorVisitor::IMUL32I(u64) {
141 ThrowNotImplemented(Opcode::IMUL32I);
142}
143
144void TranslatorVisitor::JCAL(u64) {
145 ThrowNotImplemented(Opcode::JCAL);
146}
147
148void TranslatorVisitor::JMP(u64) {
149 ThrowNotImplemented(Opcode::JMP);
150}
151
152void TranslatorVisitor::KIL() {
153 // KIL is a no-op
154}
155
156void TranslatorVisitor::LD(u64) {
157 ThrowNotImplemented(Opcode::LD);
158}
159
160void TranslatorVisitor::LEPC(u64) {
161 ThrowNotImplemented(Opcode::LEPC);
162}
163
164void TranslatorVisitor::LONGJMP(u64) {
165 ThrowNotImplemented(Opcode::LONGJMP);
166}
167
168void TranslatorVisitor::NOP(u64) {
169 // NOP is No-Op.
170}
171
172void TranslatorVisitor::PBK() {
173 // PBK is a no-op
174}
175
176void TranslatorVisitor::PCNT() {
177 // PCNT is a no-op
178}
179
180void TranslatorVisitor::PEXIT(u64) {
181 ThrowNotImplemented(Opcode::PEXIT);
182}
183
184void TranslatorVisitor::PLONGJMP(u64) {
185 ThrowNotImplemented(Opcode::PLONGJMP);
186}
187
188void TranslatorVisitor::PRET(u64) {
189 ThrowNotImplemented(Opcode::PRET);
190}
191
192void TranslatorVisitor::PRMT_reg(u64) {
193 ThrowNotImplemented(Opcode::PRMT_reg);
194}
195
196void TranslatorVisitor::PRMT_rc(u64) {
197 ThrowNotImplemented(Opcode::PRMT_rc);
198}
199
200void TranslatorVisitor::PRMT_cr(u64) {
201 ThrowNotImplemented(Opcode::PRMT_cr);
202}
203
204void TranslatorVisitor::PRMT_imm(u64) {
205 ThrowNotImplemented(Opcode::PRMT_imm);
206}
207
208void TranslatorVisitor::R2B(u64) {
209 ThrowNotImplemented(Opcode::R2B);
210}
211
212void TranslatorVisitor::RAM(u64) {
213 ThrowNotImplemented(Opcode::RAM);
214}
215
216void TranslatorVisitor::RET(u64) {
217 ThrowNotImplemented(Opcode::RET);
218}
219
220void TranslatorVisitor::RTT(u64) {
221 ThrowNotImplemented(Opcode::RTT);
222}
223
224void TranslatorVisitor::SAM(u64) {
225 ThrowNotImplemented(Opcode::SAM);
226}
227
228void TranslatorVisitor::SETCRSPTR(u64) {
229 ThrowNotImplemented(Opcode::SETCRSPTR);
230}
231
232void TranslatorVisitor::SETLMEMBASE(u64) {
233 ThrowNotImplemented(Opcode::SETLMEMBASE);
234}
235
236void TranslatorVisitor::SSY() {
237 // SSY is a no-op
238}
239
240void TranslatorVisitor::ST(u64) {
241 ThrowNotImplemented(Opcode::ST);
242}
243
244void TranslatorVisitor::STP(u64) {
245 ThrowNotImplemented(Opcode::STP);
246}
247
248void TranslatorVisitor::SUATOM_cas(u64) {
249 ThrowNotImplemented(Opcode::SUATOM_cas);
250}
251
252void TranslatorVisitor::SYNC(u64) {
253 ThrowNotImplemented(Opcode::SYNC);
254}
255
256void TranslatorVisitor::TXA(u64) {
257 ThrowNotImplemented(Opcode::TXA);
258}
259
260void TranslatorVisitor::VABSDIFF(u64) {
261 ThrowNotImplemented(Opcode::VABSDIFF);
262}
263
264void TranslatorVisitor::VABSDIFF4(u64) {
265 ThrowNotImplemented(Opcode::VABSDIFF4);
266}
267
268void TranslatorVisitor::VADD(u64) {
269 ThrowNotImplemented(Opcode::VADD);
270}
271
272void TranslatorVisitor::VSET(u64) {
273 ThrowNotImplemented(Opcode::VSET);
274}
275void TranslatorVisitor::VSHL(u64) {
276 ThrowNotImplemented(Opcode::VSHL);
277}
278
279void TranslatorVisitor::VSHR(u64) {
280 ThrowNotImplemented(Opcode::VSHR);
281}
282
283} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 000000000..01cfad88d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> output_reg; // Not needed on host
16 BitField<39, 1, u64> emit;
17 BitField<40, 1, u64> cut;
18 } const out{insn};
19
20 stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
21
22 if (out.emit != 0) {
23 v.ir.EmitVertex(stream_index);
24 }
25 if (out.cut != 0) {
26 v.ir.EndPrimitive(stream_index);
27 }
28 // Host doesn't need the output register, but we can write to it to avoid undefined reads
29 v.X(out.dest_reg, v.ir.Imm32(0));
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::OUT_reg(u64 insn) {
34 OUT(*this, insn, GetReg20(insn));
35}
36
37void TranslatorVisitor::OUT_cbuf(u64 insn) {
38 OUT(*this, insn, GetCbuf(insn));
39}
40
41void TranslatorVisitor::OUT_imm(u64 insn) {
42 OUT(*this, insn, GetImm20(insn));
43}
44
45} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
new file mode 100644
index 000000000..b4767afb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
@@ -0,0 +1,46 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 Default,
13 CovMask,
14 Covered,
15 Offset,
16 CentroidOffset,
17 MyIndex,
18};
19} // Anonymous namespace
20
21void TranslatorVisitor::PIXLD(u64 insn) {
22 union {
23 u64 raw;
24 BitField<31, 3, Mode> mode;
25 BitField<0, 8, IR::Reg> dest_reg;
26 BitField<8, 8, IR::Reg> addr_reg;
27 BitField<20, 8, s64> addr_offset;
28 BitField<45, 3, IR::Pred> dest_pred;
29 } const pixld{insn};
30
31 if (pixld.dest_pred != IR::Pred::PT) {
32 throw NotImplementedException("Destination predicate");
33 }
34 if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
35 throw NotImplementedException("Non-zero source register");
36 }
37 switch (pixld.mode) {
38 case Mode::MyIndex:
39 X(pixld.dest_reg, ir.SampleId());
40 break;
41 default:
42 throw NotImplementedException("Mode {}", pixld.mode.Value());
43 }
44}
45
46} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
new file mode 100644
index 000000000..75d1fa8c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::PSETP(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 3, IR::Pred> dest_pred_b;
15 BitField<3, 3, IR::Pred> dest_pred_a;
16 BitField<12, 3, IR::Pred> pred_a;
17 BitField<15, 1, u64> neg_pred_a;
18 BitField<24, 2, BooleanOp> bop_1;
19 BitField<29, 3, IR::Pred> pred_b;
20 BitField<32, 1, u64> neg_pred_b;
21 BitField<39, 3, IR::Pred> pred_c;
22 BitField<42, 1, u64> neg_pred_c;
23 BitField<45, 2, BooleanOp> bop_2;
24 } const pset{insn};
25
26 const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
27 const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
28 const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
29
30 const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
31 const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
32 const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
33 const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
34
35 ir.SetPred(pset.dest_pred_a, result_a);
36 ir.SetPred(pset.dest_pred_b, result_b);
37}
38} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
new file mode 100644
index 000000000..b02789874
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::PSET(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<12, 3, IR::Pred> pred_a;
16 BitField<15, 1, u64> neg_pred_a;
17 BitField<24, 2, BooleanOp> bop_1;
18 BitField<29, 3, IR::Pred> pred_b;
19 BitField<32, 1, u64> neg_pred_b;
20 BitField<39, 3, IR::Pred> pred_c;
21 BitField<42, 1, u64> neg_pred_c;
22 BitField<44, 1, u64> bf;
23 BitField<45, 2, BooleanOp> bop_2;
24 BitField<47, 1, u64> cc;
25 } const pset{insn};
26
27 const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
28 const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
29 const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
30
31 const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
32 const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
33
34 const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
35 const IR::U32 zero{ir.Imm32(0)};
36
37 const IR::U32 result{ir.Select(res_2, true_result, zero)};
38
39 X(pset.dest_reg, result);
40 if (pset.cc != 0) {
41 const IR::U1 is_zero{ir.IEqual(result, zero)};
42 SetZFlag(is_zero);
43 if (pset.bf != 0) {
44 ResetSFlag();
45 } else {
46 SetSFlag(ir.LogicalNot(is_zero));
47 }
48 ResetOFlag();
49 ResetCFlag();
50 }
51}
52
53} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
new file mode 100644
index 000000000..93baa75a9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11
12void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 } const sel{insn};
20
21 const IR::U1 pred = v.ir.GetPred(sel.pred);
22 IR::U32 op_a{v.X(sel.src_reg)};
23 IR::U32 op_b{src};
24 if (sel.neg_pred != 0) {
25 std::swap(op_a, op_b);
26 }
27 const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
28
29 v.X(sel.dest_reg, result);
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::SEL_reg(u64 insn) {
34 SEL(*this, insn, GetReg20(insn));
35}
36
37void TranslatorVisitor::SEL_cbuf(u64 insn) {
38 SEL(*this, insn, GetCbuf(insn));
39}
40
41void TranslatorVisitor::SEL_imm(u64 insn) {
42 SEL(*this, insn, GetImm20(insn));
43}
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 000000000..63b588ad4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,205 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/modifiers.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14namespace {
15enum class Type : u64 {
16 _1D,
17 BUFFER_1D,
18 ARRAY_1D,
19 _2D,
20 ARRAY_2D,
21 _3D,
22};
23
24enum class Size : u64 {
25 U32,
26 S32,
27 U64,
28 S64,
29 F32FTZRN,
30 F16x2FTZRN,
31 SD32,
32 SD64,
33};
34
35enum class AtomicOp : u64 {
36 ADD,
37 MIN,
38 MAX,
39 INC,
40 DEC,
41 AND,
42 OR,
43 XOR,
44 EXCH,
45};
46
47enum class Clamp : u64 {
48 IGN,
49 Default,
50 TRAP,
51};
52
53TextureType GetType(Type type) {
54 switch (type) {
55 case Type::_1D:
56 return TextureType::Color1D;
57 case Type::BUFFER_1D:
58 return TextureType::Buffer;
59 case Type::ARRAY_1D:
60 return TextureType::ColorArray1D;
61 case Type::_2D:
62 return TextureType::Color2D;
63 case Type::ARRAY_2D:
64 return TextureType::ColorArray2D;
65 case Type::_3D:
66 return TextureType::Color3D;
67 }
68 throw NotImplementedException("Invalid type {}", type);
69}
70
71IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
72 switch (type) {
73 case Type::_1D:
74 case Type::BUFFER_1D:
75 return v.X(reg);
76 case Type::_2D:
77 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
78 case Type::_3D:
79 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
80 default:
81 break;
82 }
83 throw NotImplementedException("Invalid type {}", type);
84}
85
86IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
87 const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
88 bool is_signed) {
89 switch (op) {
90 case AtomicOp::ADD:
91 return ir.ImageAtomicIAdd(handle, coords, op_b, info);
92 case AtomicOp::MIN:
93 return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
94 case AtomicOp::MAX:
95 return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
96 case AtomicOp::INC:
97 return ir.ImageAtomicInc(handle, coords, op_b, info);
98 case AtomicOp::DEC:
99 return ir.ImageAtomicDec(handle, coords, op_b, info);
100 case AtomicOp::AND:
101 return ir.ImageAtomicAnd(handle, coords, op_b, info);
102 case AtomicOp::OR:
103 return ir.ImageAtomicOr(handle, coords, op_b, info);
104 case AtomicOp::XOR:
105 return ir.ImageAtomicXor(handle, coords, op_b, info);
106 case AtomicOp::EXCH:
107 return ir.ImageAtomicExchange(handle, coords, op_b, info);
108 default:
109 throw NotImplementedException("Atomic Operation {}", op);
110 }
111}
112
113ImageFormat Format(Size size) {
114 switch (size) {
115 case Size::U32:
116 case Size::S32:
117 case Size::SD32:
118 return ImageFormat::R32_UINT;
119 default:
120 break;
121 }
122 throw NotImplementedException("Invalid size {}", size);
123}
124
125bool IsSizeInt32(Size size) {
126 switch (size) {
127 case Size::U32:
128 case Size::S32:
129 case Size::SD32:
130 return true;
131 default:
132 return false;
133 }
134}
135
136void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
137 IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
138 u64 bound_offset, bool is_bindless, bool write_result) {
139 if (clamp != Clamp::IGN) {
140 throw NotImplementedException("Clamp {}", clamp);
141 }
142 if (!IsSizeInt32(size)) {
143 throw NotImplementedException("Size {}", size);
144 }
145 const bool is_signed{size == Size::S32};
146 const ImageFormat format{Format(size)};
147 const TextureType tex_type{GetType(type)};
148 const IR::Value coords{MakeCoords(v, coord_reg, type)};
149
150 const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
151 : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
152 IR::TextureInstInfo info{};
153 info.type.Assign(tex_type);
154 info.image_format.Assign(format);
155
156 // TODO: float/64-bit operand
157 const IR::Value op_b{v.X(operand_reg)};
158 const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
159
160 if (write_result) {
161 v.X(dest_reg, IR::U32{color});
162 }
163}
164} // Anonymous namespace
165
166void TranslatorVisitor::SUATOM(u64 insn) {
167 union {
168 u64 raw;
169 BitField<54, 1, u64> is_bindless;
170 BitField<29, 4, AtomicOp> op;
171 BitField<33, 3, Type> type;
172 BitField<51, 3, Size> size;
173 BitField<49, 2, Clamp> clamp;
174 BitField<0, 8, IR::Reg> dest_reg;
175 BitField<8, 8, IR::Reg> coord_reg;
176 BitField<20, 8, IR::Reg> operand_reg;
177 BitField<36, 13, u64> bound_offset; // !is_bindless
178 BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
179 } const suatom{insn};
180
181 ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
182 suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
183 suatom.is_bindless != 0, true);
184}
185
186void TranslatorVisitor::SURED(u64 insn) {
187 // TODO: confirm offsets
188 union {
189 u64 raw;
190 BitField<51, 1, u64> is_bound;
191 BitField<21, 3, AtomicOp> op;
192 BitField<33, 3, Type> type;
193 BitField<20, 3, Size> size;
194 BitField<49, 2, Clamp> clamp;
195 BitField<0, 8, IR::Reg> operand_reg;
196 BitField<8, 8, IR::Reg> coord_reg;
197 BitField<36, 13, u64> bound_offset; // is_bound
198 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
199 } const sured{insn};
200 ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
201 sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
202 sured.is_bound == 0, false);
203}
204
205} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
new file mode 100644
index 000000000..681220a8d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
@@ -0,0 +1,281 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/modifiers.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14namespace {
15enum class Type : u64 {
16 _1D,
17 BUFFER_1D,
18 ARRAY_1D,
19 _2D,
20 ARRAY_2D,
21 _3D,
22};
23
24constexpr unsigned R = 1 << 0;
25constexpr unsigned G = 1 << 1;
26constexpr unsigned B = 1 << 2;
27constexpr unsigned A = 1 << 3;
28
29constexpr std::array MASK{
30 0U, //
31 R, //
32 G, //
33 R | G, //
34 B, //
35 R | B, //
36 G | B, //
37 R | G | B, //
38 A, //
39 R | A, //
40 G | A, //
41 R | G | A, //
42 B | A, //
43 R | B | A, //
44 G | B | A, //
45 R | G | B | A, //
46};
47
48enum class Size : u64 {
49 U8,
50 S8,
51 U16,
52 S16,
53 B32,
54 B64,
55 B128,
56};
57
58enum class Clamp : u64 {
59 IGN,
60 Default,
61 TRAP,
62};
63
64// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
65enum class LoadCache : u64 {
66 CA, // Cache at all levels, likely to be accessed again
67 CG, // Cache at global level (L2 and below, not L1)
68 CI, // ???
69 CV, // Don't cache and fetch again (volatile)
70};
71
72enum class StoreCache : u64 {
73 WB, // Cache write-back all coherent levels
74 CG, // Cache at global level (L2 and below, not L1)
75 CS, // Cache streaming, likely to be accessed once
76 WT, // Cache write-through (to system memory, volatile?)
77};
78
79ImageFormat Format(Size size) {
80 switch (size) {
81 case Size::U8:
82 return ImageFormat::R8_UINT;
83 case Size::S8:
84 return ImageFormat::R8_SINT;
85 case Size::U16:
86 return ImageFormat::R16_UINT;
87 case Size::S16:
88 return ImageFormat::R16_SINT;
89 case Size::B32:
90 return ImageFormat::R32_UINT;
91 case Size::B64:
92 return ImageFormat::R32G32_UINT;
93 case Size::B128:
94 return ImageFormat::R32G32B32A32_UINT;
95 }
96 throw NotImplementedException("Invalid size {}", size);
97}
98
99int SizeInRegs(Size size) {
100 switch (size) {
101 case Size::U8:
102 case Size::S8:
103 case Size::U16:
104 case Size::S16:
105 case Size::B32:
106 return 1;
107 case Size::B64:
108 return 2;
109 case Size::B128:
110 return 4;
111 }
112 throw NotImplementedException("Invalid size {}", size);
113}
114
115TextureType GetType(Type type) {
116 switch (type) {
117 case Type::_1D:
118 return TextureType::Color1D;
119 case Type::BUFFER_1D:
120 return TextureType::Buffer;
121 case Type::ARRAY_1D:
122 return TextureType::ColorArray1D;
123 case Type::_2D:
124 return TextureType::Color2D;
125 case Type::ARRAY_2D:
126 return TextureType::ColorArray2D;
127 case Type::_3D:
128 return TextureType::Color3D;
129 }
130 throw NotImplementedException("Invalid type {}", type);
131}
132
133IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
134 const auto array{[&](int index) {
135 return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
136 }};
137 switch (type) {
138 case Type::_1D:
139 case Type::BUFFER_1D:
140 return v.X(reg);
141 case Type::ARRAY_1D:
142 return v.ir.CompositeConstruct(v.X(reg), array(1));
143 case Type::_2D:
144 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
145 case Type::ARRAY_2D:
146 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
147 case Type::_3D:
148 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
149 }
150 throw NotImplementedException("Invalid type {}", type);
151}
152
153unsigned SwizzleMask(u64 swizzle) {
154 if (swizzle == 0 || swizzle >= MASK.size()) {
155 throw NotImplementedException("Invalid swizzle {}", swizzle);
156 }
157 return MASK[swizzle];
158}
159
160IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
161 std::array<IR::U32, 4> colors;
162 for (int i = 0; i < num_regs; ++i) {
163 colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
164 }
165 for (int i = num_regs; i < 4; ++i) {
166 colors[static_cast<size_t>(i)] = ir.Imm32(0);
167 }
168 return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
169}
170} // Anonymous namespace
171
172void TranslatorVisitor::SULD(u64 insn) {
173 union {
174 u64 raw;
175 BitField<51, 1, u64> is_bound;
176 BitField<52, 1, u64> d;
177 BitField<23, 1, u64> ba;
178 BitField<33, 3, Type> type;
179 BitField<24, 2, LoadCache> cache;
180 BitField<20, 3, Size> size; // .D
181 BitField<20, 4, u64> swizzle; // .P
182 BitField<49, 2, Clamp> clamp;
183 BitField<0, 8, IR::Reg> dest_reg;
184 BitField<8, 8, IR::Reg> coord_reg;
185 BitField<36, 13, u64> bound_offset; // is_bound
186 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
187 } const suld{insn};
188
189 if (suld.clamp != Clamp::IGN) {
190 throw NotImplementedException("Clamp {}", suld.clamp.Value());
191 }
192 if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
193 throw NotImplementedException("Cache {}", suld.cache.Value());
194 }
195 const bool is_typed{suld.d != 0};
196 if (is_typed && suld.ba != 0) {
197 throw NotImplementedException("BA");
198 }
199
200 const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
201 const TextureType type{GetType(suld.type)};
202 const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
203 const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
204 : X(suld.bindless_reg)};
205 IR::TextureInstInfo info{};
206 info.type.Assign(type);
207 info.image_format.Assign(format);
208
209 const IR::Value result{ir.ImageRead(handle, coords, info)};
210 IR::Reg dest_reg{suld.dest_reg};
211 if (is_typed) {
212 const int num_regs{SizeInRegs(suld.size)};
213 for (int i = 0; i < num_regs; ++i) {
214 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
215 }
216 } else {
217 const unsigned mask{SwizzleMask(suld.swizzle)};
218 const int bits{std::popcount(mask)};
219 if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
220 throw NotImplementedException("Unaligned destination register");
221 }
222 for (unsigned component = 0; component < 4; ++component) {
223 if (((mask >> component) & 1) == 0) {
224 continue;
225 }
226 X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
227 ++dest_reg;
228 }
229 }
230}
231
232void TranslatorVisitor::SUST(u64 insn) {
233 union {
234 u64 raw;
235 BitField<51, 1, u64> is_bound;
236 BitField<52, 1, u64> d;
237 BitField<23, 1, u64> ba;
238 BitField<33, 3, Type> type;
239 BitField<24, 2, StoreCache> cache;
240 BitField<20, 3, Size> size; // .D
241 BitField<20, 4, u64> swizzle; // .P
242 BitField<49, 2, Clamp> clamp;
243 BitField<0, 8, IR::Reg> data_reg;
244 BitField<8, 8, IR::Reg> coord_reg;
245 BitField<36, 13, u64> bound_offset; // is_bound
246 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
247 } const sust{insn};
248
249 if (sust.clamp != Clamp::IGN) {
250 throw NotImplementedException("Clamp {}", sust.clamp.Value());
251 }
252 if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
253 throw NotImplementedException("Cache {}", sust.cache.Value());
254 }
255 const bool is_typed{sust.d != 0};
256 if (is_typed && sust.ba != 0) {
257 throw NotImplementedException("BA");
258 }
259 const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
260 const TextureType type{GetType(sust.type)};
261 const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
262 const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
263 : X(sust.bindless_reg)};
264 IR::TextureInstInfo info{};
265 info.type.Assign(type);
266 info.image_format.Assign(format);
267
268 IR::Value color;
269 if (is_typed) {
270 color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
271 } else {
272 const unsigned mask{SwizzleMask(sust.swizzle)};
273 if (mask != 0xf) {
274 throw NotImplementedException("Non-full mask");
275 }
276 color = MakeColor(ir, sust.data_reg, 4);
277 }
278 ir.ImageWrite(handle, coords, color, info);
279}
280
281} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
new file mode 100644
index 000000000..0046b5edd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -0,0 +1,236 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Blod : u64 {
15 None,
16 LZ,
17 LB,
18 LL,
19 INVALIDBLOD4,
20 INVALIDBLOD5,
21 LBA,
22 LLA,
23};
24
25enum class TextureType : u64 {
26 _1D,
27 ARRAY_1D,
28 _2D,
29 ARRAY_2D,
30 _3D,
31 ARRAY_3D,
32 CUBE,
33 ARRAY_CUBE,
34};
35
36Shader::TextureType GetType(TextureType type) {
37 switch (type) {
38 case TextureType::_1D:
39 return Shader::TextureType::Color1D;
40 case TextureType::ARRAY_1D:
41 return Shader::TextureType::ColorArray1D;
42 case TextureType::_2D:
43 return Shader::TextureType::Color2D;
44 case TextureType::ARRAY_2D:
45 return Shader::TextureType::ColorArray2D;
46 case TextureType::_3D:
47 return Shader::TextureType::Color3D;
48 case TextureType::ARRAY_3D:
49 throw NotImplementedException("3D array texture type");
50 case TextureType::CUBE:
51 return Shader::TextureType::ColorCube;
52 case TextureType::ARRAY_CUBE:
53 return Shader::TextureType::ColorArrayCube;
54 }
55 throw NotImplementedException("Invalid texture type {}", type);
56}
57
58IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
59 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
60 switch (type) {
61 case TextureType::_1D:
62 return v.F(reg);
63 case TextureType::ARRAY_1D:
64 return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
65 case TextureType::_2D:
66 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
67 case TextureType::ARRAY_2D:
68 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
69 case TextureType::_3D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
71 case TextureType::ARRAY_3D:
72 throw NotImplementedException("3D array texture type");
73 case TextureType::CUBE:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_CUBE:
76 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
77 }
78 throw NotImplementedException("Invalid texture type {}", type);
79}
80
81IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
82 switch (blod) {
83 case Blod::None:
84 return v.ir.Imm32(0.0f);
85 case Blod::LZ:
86 return v.ir.Imm32(0.0f);
87 case Blod::LB:
88 case Blod::LL:
89 case Blod::LBA:
90 case Blod::LLA:
91 return v.F(reg++);
92 case Blod::INVALIDBLOD4:
93 case Blod::INVALIDBLOD5:
94 break;
95 }
96 throw NotImplementedException("Invalid blod {}", blod);
97}
98
99IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
100 const IR::U32 value{v.X(reg++)};
101 switch (type) {
102 case TextureType::_1D:
103 case TextureType::ARRAY_1D:
104 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
105 case TextureType::_2D:
106 case TextureType::ARRAY_2D:
107 return v.ir.CompositeConstruct(
108 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
109 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
110 case TextureType::_3D:
111 case TextureType::ARRAY_3D:
112 return v.ir.CompositeConstruct(
113 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
114 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
115 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
116 case TextureType::CUBE:
117 case TextureType::ARRAY_CUBE:
118 throw NotImplementedException("Illegal offset on CUBE sample");
119 }
120 throw NotImplementedException("Invalid texture type {}", type);
121}
122
123bool HasExplicitLod(Blod blod) {
124 switch (blod) {
125 case Blod::LL:
126 case Blod::LLA:
127 case Blod::LZ:
128 return true;
129 default:
130 return false;
131 }
132}
133
134void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
135 std::optional<u32> cbuf_offset) {
136 union {
137 u64 raw;
138 BitField<35, 1, u64> ndv;
139 BitField<49, 1, u64> nodep;
140 BitField<50, 1, u64> dc;
141 BitField<51, 3, IR::Pred> sparse_pred;
142 BitField<0, 8, IR::Reg> dest_reg;
143 BitField<8, 8, IR::Reg> coord_reg;
144 BitField<20, 8, IR::Reg> meta_reg;
145 BitField<28, 3, TextureType> type;
146 BitField<31, 4, u64> mask;
147 } const tex{insn};
148
149 if (lc) {
150 throw NotImplementedException("LC");
151 }
152 const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
153
154 IR::Reg meta_reg{tex.meta_reg};
155 IR::Value handle;
156 IR::Value offset;
157 IR::F32 dref;
158 IR::F32 lod_clamp;
159 if (cbuf_offset) {
160 handle = v.ir.Imm32(*cbuf_offset);
161 } else {
162 handle = v.X(meta_reg++);
163 }
164 const IR::F32 lod{MakeLod(v, meta_reg, blod)};
165 if (aoffi) {
166 offset = MakeOffset(v, meta_reg, tex.type);
167 }
168 if (tex.dc != 0) {
169 dref = v.F(meta_reg++);
170 }
171 IR::TextureInstInfo info{};
172 info.type.Assign(GetType(tex.type));
173 info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
174 info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
175 info.has_lod_clamp.Assign(lc ? 1 : 0);
176
177 const IR::Value sample{[&]() -> IR::Value {
178 if (tex.dc == 0) {
179 if (HasExplicitLod(blod)) {
180 return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
181 } else {
182 return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
183 }
184 }
185 if (HasExplicitLod(blod)) {
186 return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
187 } else {
188 return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
189 info);
190 }
191 }()};
192
193 IR::Reg dest_reg{tex.dest_reg};
194 for (int element = 0; element < 4; ++element) {
195 if (((tex.mask >> element) & 1) == 0) {
196 continue;
197 }
198 IR::F32 value;
199 if (tex.dc != 0) {
200 value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
201 } else {
202 value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
203 }
204 v.F(dest_reg, value);
205 ++dest_reg;
206 }
207 if (tex.sparse_pred != IR::Pred::PT) {
208 v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
209 }
210}
211} // Anonymous namespace
212
213void TranslatorVisitor::TEX(u64 insn) {
214 union {
215 u64 raw;
216 BitField<54, 1, u64> aoffi;
217 BitField<55, 3, Blod> blod;
218 BitField<58, 1, u64> lc;
219 BitField<36, 13, u64> cbuf_offset;
220 } const tex{insn};
221
222 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
223}
224
225void TranslatorVisitor::TEX_b(u64 insn) {
226 union {
227 u64 raw;
228 BitField<36, 1, u64> aoffi;
229 BitField<37, 3, Blod> blod;
230 BitField<40, 1, u64> lc;
231 } const tex{insn};
232
233 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
234}
235
236} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..154e7f1a1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,266 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F16,
16 F32,
17};
18
19union Encoding {
20 u64 raw;
21 BitField<59, 1, Precision> precision;
22 BitField<53, 4, u64> encoding;
23 BitField<49, 1, u64> nodep;
24 BitField<28, 8, IR::Reg> dest_reg_b;
25 BitField<0, 8, IR::Reg> dest_reg_a;
26 BitField<8, 8, IR::Reg> src_reg_a;
27 BitField<20, 8, IR::Reg> src_reg_b;
28 BitField<36, 13, u64> cbuf_offset;
29 BitField<50, 3, u64> swizzle;
30};
31
32constexpr unsigned R = 1;
33constexpr unsigned G = 2;
34constexpr unsigned B = 4;
35constexpr unsigned A = 8;
36
37constexpr std::array RG_LUT{
38 R, //
39 G, //
40 B, //
41 A, //
42 R | G, //
43 R | A, //
44 G | A, //
45 B | A, //
46};
47
48constexpr std::array RGBA_LUT{
49 R | G | B, //
50 R | G | A, //
51 R | B | A, //
52 G | B | A, //
53 R | G | B | A, //
54};
55
56void CheckAlignment(IR::Reg reg, size_t alignment) {
57 if (!IR::IsAligned(reg, alignment)) {
58 throw NotImplementedException("Unaligned source register {}", reg);
59 }
60}
61
62template <typename... Args>
63IR::Value Composite(TranslatorVisitor& v, Args... regs) {
64 return v.ir.CompositeConstruct(v.F(regs)...);
65}
66
67IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
68 return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
69}
70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding texs{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
74 const IR::F32 zero{v.ir.Imm32(0.0f)};
75 const IR::Reg reg_a{texs.src_reg_a};
76 const IR::Reg reg_b{texs.src_reg_b};
77 IR::TextureInstInfo info{};
78 if (texs.precision == Precision::F16) {
79 info.relaxed_precision.Assign(1);
80 }
81 switch (texs.encoding) {
82 case 0: // 1D.LZ
83 info.type.Assign(TextureType::Color1D);
84 return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
85 case 1: // 2D
86 info.type.Assign(TextureType::Color2D);
87 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
88 case 2: // 2D.LZ
89 info.type.Assign(TextureType::Color2D);
90 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
91 case 3: // 2D.LL
92 CheckAlignment(reg_a, 2);
93 info.type.Assign(TextureType::Color2D);
94 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
95 info);
96 case 4: // 2D.DC
97 CheckAlignment(reg_a, 2);
98 info.type.Assign(TextureType::Color2D);
99 info.is_depth.Assign(1);
100 return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
101 {}, {}, {}, info);
102 case 5: // 2D.LL.DC
103 CheckAlignment(reg_a, 2);
104 CheckAlignment(reg_b, 2);
105 info.type.Assign(TextureType::Color2D);
106 info.is_depth.Assign(1);
107 return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
108 v.F(reg_b + 1), v.F(reg_b), {}, info);
109 case 6: // 2D.LZ.DC
110 CheckAlignment(reg_a, 2);
111 info.type.Assign(TextureType::Color2D);
112 info.is_depth.Assign(1);
113 return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
114 zero, {}, info);
115 case 7: // ARRAY_2D
116 CheckAlignment(reg_a, 2);
117 info.type.Assign(TextureType::ColorArray2D);
118 return v.ir.ImageSampleImplicitLod(
119 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
120 {}, {}, {}, info);
121 case 8: // ARRAY_2D.LZ
122 CheckAlignment(reg_a, 2);
123 info.type.Assign(TextureType::ColorArray2D);
124 return v.ir.ImageSampleExplicitLod(
125 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
126 zero, {}, info);
127 case 9: // ARRAY_2D.LZ.DC
128 CheckAlignment(reg_a, 2);
129 CheckAlignment(reg_b, 2);
130 info.type.Assign(TextureType::ColorArray2D);
131 info.is_depth.Assign(1);
132 return v.ir.ImageSampleDrefExplicitLod(
133 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
134 v.F(reg_b + 1), zero, {}, info);
135 case 10: // 3D
136 CheckAlignment(reg_a, 2);
137 info.type.Assign(TextureType::Color3D);
138 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
139 {}, info);
140 case 11: // 3D.LZ
141 CheckAlignment(reg_a, 2);
142 info.type.Assign(TextureType::Color3D);
143 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
144 info);
145 case 12: // CUBE
146 CheckAlignment(reg_a, 2);
147 info.type.Assign(TextureType::ColorCube);
148 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
149 {}, info);
150 case 13: // CUBE.LL
151 CheckAlignment(reg_a, 2);
152 CheckAlignment(reg_b, 2);
153 info.type.Assign(TextureType::ColorCube);
154 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
155 v.F(reg_b + 1), {}, info);
156 default:
157 throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
158 }
159}
160
161unsigned Swizzle(u64 insn) {
162 const Encoding texs{insn};
163 const size_t encoding{texs.swizzle};
164 if (texs.dest_reg_b == IR::Reg::RZ) {
165 if (encoding >= RG_LUT.size()) {
166 throw NotImplementedException("Illegal RG encoding {}", encoding);
167 }
168 return RG_LUT[encoding];
169 } else {
170 if (encoding >= RGBA_LUT.size()) {
171 throw NotImplementedException("Illegal RGBA encoding {}", encoding);
172 }
173 return RGBA_LUT[encoding];
174 }
175}
176
177IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
178 const bool is_shadow{sample.Type() == IR::Type::F32};
179 if (is_shadow) {
180 const bool is_alpha{component == 3};
181 return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
182 } else {
183 return IR::F32{v.ir.CompositeExtract(sample, component)};
184 }
185}
186
187IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
188 const Encoding texs{insn};
189 switch (index) {
190 case 0:
191 return texs.dest_reg_a;
192 case 1:
193 CheckAlignment(texs.dest_reg_a, 2);
194 return texs.dest_reg_a + 1;
195 case 2:
196 return texs.dest_reg_b;
197 case 3:
198 CheckAlignment(texs.dest_reg_b, 2);
199 return texs.dest_reg_b + 1;
200 }
201 throw LogicError("Invalid store index {}", index);
202}
203
204void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
205 const unsigned swizzle{Swizzle(insn)};
206 unsigned store_index{0};
207 for (unsigned component = 0; component < 4; ++component) {
208 if (((swizzle >> component) & 1) == 0) {
209 continue;
210 }
211 const IR::Reg dest{RegStoreComponent32(insn, store_index)};
212 v.F(dest, Extract(v, sample, component));
213 ++store_index;
214 }
215}
216
217IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
218 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
219}
220
221void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
222 const unsigned swizzle{Swizzle(insn)};
223 unsigned store_index{0};
224 std::array<IR::F32, 4> swizzled;
225 for (unsigned component = 0; component < 4; ++component) {
226 if (((swizzle >> component) & 1) == 0) {
227 continue;
228 }
229 swizzled[store_index] = Extract(v, sample, component);
230 ++store_index;
231 }
232 const IR::F32 zero{v.ir.Imm32(0.0f)};
233 const Encoding texs{insn};
234 switch (store_index) {
235 case 1:
236 v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
237 break;
238 case 2:
239 case 3:
240 case 4:
241 v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
242 switch (store_index) {
243 case 2:
244 break;
245 case 3:
246 v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
247 break;
248 case 4:
249 v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
250 break;
251 }
252 break;
253 }
254}
255} // Anonymous namespace
256
257void TranslatorVisitor::TEXS(u64 insn) {
258 const IR::Value sample{Sample(*this, insn)};
259 if (Encoding{insn}.precision == Precision::F32) {
260 Store32(*this, insn, sample);
261 } else {
262 Store16(*this, insn, sample);
263 }
264}
265
266} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
new file mode 100644
index 000000000..218cbc1a8
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
@@ -0,0 +1,208 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26enum class OffsetType : u64 {
27 None = 0,
28 AOFFI,
29 PTP,
30 Invalid,
31};
32
33enum class ComponentType : u64 {
34 R = 0,
35 G = 1,
36 B = 2,
37 A = 3,
38};
39
40Shader::TextureType GetType(TextureType type) {
41 switch (type) {
42 case TextureType::_1D:
43 return Shader::TextureType::Color1D;
44 case TextureType::ARRAY_1D:
45 return Shader::TextureType::ColorArray1D;
46 case TextureType::_2D:
47 return Shader::TextureType::Color2D;
48 case TextureType::ARRAY_2D:
49 return Shader::TextureType::ColorArray2D;
50 case TextureType::_3D:
51 return Shader::TextureType::Color3D;
52 case TextureType::ARRAY_3D:
53 throw NotImplementedException("3D array texture type");
54 case TextureType::CUBE:
55 return Shader::TextureType::ColorCube;
56 case TextureType::ARRAY_CUBE:
57 return Shader::TextureType::ColorArrayCube;
58 }
59 throw NotImplementedException("Invalid texture type {}", type);
60}
61
62IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
63 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
64 switch (type) {
65 case TextureType::_1D:
66 return v.F(reg);
67 case TextureType::ARRAY_1D:
68 return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
69 case TextureType::_2D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
71 case TextureType::ARRAY_2D:
72 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
73 case TextureType::_3D:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_3D:
76 throw NotImplementedException("3D array texture type");
77 case TextureType::CUBE:
78 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
79 case TextureType::ARRAY_CUBE:
80 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
81 }
82 throw NotImplementedException("Invalid texture type {}", type);
83}
84
85IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
86 const IR::U32 value{v.X(reg++)};
87 switch (type) {
88 case TextureType::_1D:
89 case TextureType::ARRAY_1D:
90 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
91 case TextureType::_2D:
92 case TextureType::ARRAY_2D:
93 return v.ir.CompositeConstruct(
94 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
95 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
96 case TextureType::_3D:
97 case TextureType::ARRAY_3D:
98 return v.ir.CompositeConstruct(
99 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
100 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
101 v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
102 case TextureType::CUBE:
103 case TextureType::ARRAY_CUBE:
104 throw NotImplementedException("Illegal offset on CUBE sample");
105 }
106 throw NotImplementedException("Invalid texture type {}", type);
107}
108
109std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
110 const IR::U32 value1{v.X(reg++)};
111 const IR::U32 value2{v.X(reg++)};
112 const IR::U32 bitsize{v.ir.Imm32(6)};
113 const auto make_vector{[&v, &bitsize](const IR::U32& value) {
114 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
115 v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
116 v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
117 v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
118 }};
119 return {make_vector(value1), make_vector(value2)};
120}
121
122void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
123 bool is_bindless) {
124 union {
125 u64 raw;
126 BitField<35, 1, u64> ndv;
127 BitField<49, 1, u64> nodep;
128 BitField<50, 1, u64> dc;
129 BitField<51, 3, IR::Pred> sparse_pred;
130 BitField<0, 8, IR::Reg> dest_reg;
131 BitField<8, 8, IR::Reg> coord_reg;
132 BitField<20, 8, IR::Reg> meta_reg;
133 BitField<28, 3, TextureType> type;
134 BitField<31, 4, u64> mask;
135 BitField<36, 13, u64> cbuf_offset;
136 } const tld4{insn};
137
138 const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
139
140 IR::Reg meta_reg{tld4.meta_reg};
141 IR::Value handle;
142 IR::Value offset;
143 IR::Value offset2;
144 IR::F32 dref;
145 if (!is_bindless) {
146 handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
147 } else {
148 handle = v.X(meta_reg++);
149 }
150 switch (offset_type) {
151 case OffsetType::None:
152 break;
153 case OffsetType::AOFFI:
154 offset = MakeOffset(v, meta_reg, tld4.type);
155 break;
156 case OffsetType::PTP:
157 std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
158 break;
159 default:
160 throw NotImplementedException("Invalid offset type {}", offset_type);
161 }
162 if (tld4.dc != 0) {
163 dref = v.F(meta_reg++);
164 }
165 IR::TextureInstInfo info{};
166 info.type.Assign(GetType(tld4.type));
167 info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
168 info.gather_component.Assign(static_cast<u32>(component_type));
169 const IR::Value sample{[&] {
170 if (tld4.dc == 0) {
171 return v.ir.ImageGather(handle, coords, offset, offset2, info);
172 }
173 return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
174 }()};
175
176 IR::Reg dest_reg{tld4.dest_reg};
177 for (size_t element = 0; element < 4; ++element) {
178 if (((tld4.mask >> element) & 1) == 0) {
179 continue;
180 }
181 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
182 ++dest_reg;
183 }
184 if (tld4.sparse_pred != IR::Pred::PT) {
185 v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
186 }
187}
188} // Anonymous namespace
189
190void TranslatorVisitor::TLD4(u64 insn) {
191 union {
192 u64 raw;
193 BitField<56, 2, ComponentType> component;
194 BitField<54, 2, OffsetType> offset;
195 } const tld4{insn};
196 Impl(*this, insn, tld4.component, tld4.offset, false);
197}
198
199void TranslatorVisitor::TLD4_b(u64 insn) {
200 union {
201 u64 raw;
202 BitField<38, 2, ComponentType> component;
203 BitField<36, 2, OffsetType> offset;
204 } const tld4{insn};
205 Impl(*this, insn, tld4.component, tld4.offset, true);
206}
207
208} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
new file mode 100644
index 000000000..34efa2d50
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -0,0 +1,134 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F32,
16 F16,
17};
18
19enum class ComponentType : u64 {
20 R = 0,
21 G = 1,
22 B = 2,
23 A = 3,
24};
25
26union Encoding {
27 u64 raw;
28 BitField<55, 1, Precision> precision;
29 BitField<52, 2, ComponentType> component_type;
30 BitField<51, 1, u64> aoffi;
31 BitField<50, 1, u64> dc;
32 BitField<49, 1, u64> nodep;
33 BitField<28, 8, IR::Reg> dest_reg_b;
34 BitField<0, 8, IR::Reg> dest_reg_a;
35 BitField<8, 8, IR::Reg> src_reg_a;
36 BitField<20, 8, IR::Reg> src_reg_b;
37 BitField<36, 13, u64> cbuf_offset;
38};
39
40void CheckAlignment(IR::Reg reg, size_t alignment) {
41 if (!IR::IsAligned(reg, alignment)) {
42 throw NotImplementedException("Unaligned source register {}", reg);
43 }
44}
45
46IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
47 const IR::U32 value{v.X(reg)};
48 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
49 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
50}
51
52IR::Value Sample(TranslatorVisitor& v, u64 insn) {
53 const Encoding tld4s{insn};
54 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
55 const IR::Reg reg_a{tld4s.src_reg_a};
56 const IR::Reg reg_b{tld4s.src_reg_b};
57 IR::TextureInstInfo info{};
58 if (tld4s.precision == Precision::F16) {
59 info.relaxed_precision.Assign(1);
60 }
61 info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
62 info.type.Assign(Shader::TextureType::Color2D);
63 info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
64 IR::Value coords;
65 if (tld4s.aoffi != 0) {
66 CheckAlignment(reg_a, 2);
67 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
68 IR::Value offset = MakeOffset(v, reg_b);
69 if (tld4s.dc != 0) {
70 CheckAlignment(reg_b, 2);
71 IR::F32 dref = v.F(reg_b + 1);
72 return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
73 }
74 return v.ir.ImageGather(handle, coords, offset, {}, info);
75 }
76 if (tld4s.dc != 0) {
77 CheckAlignment(reg_a, 2);
78 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
79 IR::F32 dref = v.F(reg_b);
80 return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
81 }
82 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
83 return v.ir.ImageGather(handle, coords, {}, {}, info);
84}
85
86IR::Reg RegStoreComponent32(u64 insn, size_t index) {
87 const Encoding tlds4{insn};
88 switch (index) {
89 case 0:
90 return tlds4.dest_reg_a;
91 case 1:
92 CheckAlignment(tlds4.dest_reg_a, 2);
93 return tlds4.dest_reg_a + 1;
94 case 2:
95 return tlds4.dest_reg_b;
96 case 3:
97 CheckAlignment(tlds4.dest_reg_b, 2);
98 return tlds4.dest_reg_b + 1;
99 }
100 throw LogicError("Invalid store index {}", index);
101}
102
103void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
104 for (size_t component = 0; component < 4; ++component) {
105 const IR::Reg dest{RegStoreComponent32(insn, component)};
106 v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
107 }
108}
109
110IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
111 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
112}
113
114void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
115 std::array<IR::F32, 4> swizzled;
116 for (size_t component = 0; component < 4; ++component) {
117 swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
118 }
119 const Encoding tld4s{insn};
120 v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
121 v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
122}
123} // Anonymous namespace
124
125void TranslatorVisitor::TLD4S(u64 insn) {
126 const IR::Value sample{Sample(*this, insn)};
127 if (Encoding{insn}.precision == Precision::F32) {
128 Store32(*this, insn, sample);
129 } else {
130 Store16(*this, insn, sample);
131 }
132}
133
134} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
new file mode 100644
index 000000000..c3fe3ffda
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -0,0 +1,182 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
49 const IR::U32 value{v.X(reg)};
50 const u32 base{has_lod_clamp ? 12U : 16U};
51 return v.ir.CompositeConstruct(
52 v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
53 v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
54}
55
56void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
57 union {
58 u64 raw;
59 BitField<49, 1, u64> nodep;
60 BitField<35, 1, u64> aoffi;
61 BitField<50, 1, u64> lc;
62 BitField<51, 3, IR::Pred> sparse_pred;
63 BitField<0, 8, IR::Reg> dest_reg;
64 BitField<8, 8, IR::Reg> coord_reg;
65 BitField<20, 8, IR::Reg> derivate_reg;
66 BitField<28, 3, TextureType> type;
67 BitField<31, 4, u64> mask;
68 BitField<36, 13, u64> cbuf_offset;
69 } const txd{insn};
70
71 const bool has_lod_clamp = txd.lc != 0;
72 if (has_lod_clamp) {
73 throw NotImplementedException("TXD.LC - CLAMP is not implemented");
74 }
75
76 IR::Value coords;
77 u32 num_derivates{};
78 IR::Reg base_reg{txd.coord_reg};
79 IR::Reg last_reg;
80 IR::Value handle;
81 if (is_bindless) {
82 handle = v.X(base_reg++);
83 } else {
84 handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
85 }
86
87 const auto read_array{[&]() -> IR::F32 {
88 const IR::U32 base{v.ir.Imm32(0)};
89 const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
90 const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
91 return v.ir.ConvertUToF(32, 16, array_index);
92 }};
93 switch (txd.type) {
94 case TextureType::_1D: {
95 coords = v.F(base_reg);
96 num_derivates = 1;
97 last_reg = base_reg + 1;
98 break;
99 }
100 case TextureType::ARRAY_1D: {
101 last_reg = base_reg + 1;
102 coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
103 num_derivates = 1;
104 break;
105 }
106 case TextureType::_2D: {
107 last_reg = base_reg + 2;
108 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
109 num_derivates = 2;
110 break;
111 }
112 case TextureType::ARRAY_2D: {
113 last_reg = base_reg + 2;
114 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
115 num_derivates = 2;
116 break;
117 }
118 default:
119 throw NotImplementedException("Invalid texture type");
120 }
121
122 const IR::Reg derivate_reg{txd.derivate_reg};
123 IR::Value derivates;
124 switch (num_derivates) {
125 case 1: {
126 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
127 break;
128 }
129 case 2: {
130 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
131 v.F(derivate_reg + 2), v.F(derivate_reg + 3));
132 break;
133 }
134 default:
135 throw NotImplementedException("Invalid texture type");
136 }
137
138 IR::Value offset;
139 if (txd.aoffi != 0) {
140 offset = MakeOffset(v, last_reg, has_lod_clamp);
141 }
142
143 IR::F32 lod_clamp;
144 if (has_lod_clamp) {
145 // Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
146 // to convert a fixed point, float(value) / float(1 << fixed_point)
147 // in this case the fixed_point is 8.
148 const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
149 const IR::F32 fixp_lc{v.ir.ConvertUToF(
150 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
151 lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
152 }
153
154 IR::TextureInstInfo info{};
155 info.type.Assign(GetType(txd.type));
156 info.num_derivates.Assign(num_derivates);
157 info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
158 const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
159
160 IR::Reg dest_reg{txd.dest_reg};
161 for (size_t element = 0; element < 4; ++element) {
162 if (((txd.mask >> element) & 1) == 0) {
163 continue;
164 }
165 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
166 ++dest_reg;
167 }
168 if (txd.sparse_pred != IR::Pred::PT) {
169 v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
170 }
171}
172} // Anonymous namespace
173
174void TranslatorVisitor::TXD(u64 insn) {
175 Impl(*this, insn, false);
176}
177
178void TranslatorVisitor::TXD_b(u64 insn) {
179 Impl(*this, insn, true);
180}
181
182} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
new file mode 100644
index 000000000..983058303
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -0,0 +1,165 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
49 const auto read_array{
50 [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
51 switch (type) {
52 case TextureType::_1D:
53 return v.X(reg);
54 case TextureType::ARRAY_1D:
55 return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
56 case TextureType::_2D:
57 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
58 case TextureType::ARRAY_2D:
59 return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
60 case TextureType::_3D:
61 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
62 case TextureType::ARRAY_3D:
63 throw NotImplementedException("3D array texture type");
64 case TextureType::CUBE:
65 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
66 case TextureType::ARRAY_CUBE:
67 return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
68 }
69 throw NotImplementedException("Invalid texture type {}", type);
70}
71
72IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
73 const IR::U32 value{v.X(reg++)};
74 switch (type) {
75 case TextureType::_1D:
76 case TextureType::ARRAY_1D:
77 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
78 case TextureType::_2D:
79 case TextureType::ARRAY_2D:
80 return v.ir.CompositeConstruct(
81 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
82 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
83 case TextureType::_3D:
84 case TextureType::ARRAY_3D:
85 return v.ir.CompositeConstruct(
86 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
87 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
88 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
89 case TextureType::CUBE:
90 case TextureType::ARRAY_CUBE:
91 throw NotImplementedException("Illegal offset on CUBE sample");
92 }
93 throw NotImplementedException("Invalid texture type {}", type);
94}
95
96void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
97 union {
98 u64 raw;
99 BitField<49, 1, u64> nodep;
100 BitField<55, 1, u64> lod;
101 BitField<50, 1, u64> multisample;
102 BitField<35, 1, u64> aoffi;
103 BitField<54, 1, u64> clamp;
104 BitField<51, 3, IR::Pred> sparse_pred;
105 BitField<0, 8, IR::Reg> dest_reg;
106 BitField<8, 8, IR::Reg> coord_reg;
107 BitField<20, 8, IR::Reg> meta_reg;
108 BitField<28, 3, TextureType> type;
109 BitField<31, 4, u64> mask;
110 BitField<36, 13, u64> cbuf_offset;
111 } const tld{insn};
112
113 const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
114
115 IR::Reg meta_reg{tld.meta_reg};
116 IR::Value handle;
117 IR::Value offset;
118 IR::U32 lod;
119 IR::U32 multisample;
120 if (is_bindless) {
121 handle = v.X(meta_reg++);
122 } else {
123 handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
124 }
125 if (tld.lod != 0) {
126 lod = v.X(meta_reg++);
127 } else {
128 lod = v.ir.Imm32(0U);
129 }
130 if (tld.aoffi != 0) {
131 offset = MakeOffset(v, meta_reg, tld.type);
132 }
133 if (tld.multisample != 0) {
134 multisample = v.X(meta_reg++);
135 }
136 if (tld.clamp != 0) {
137 throw NotImplementedException("TLD.CL - CLAMP is not implmented");
138 }
139 IR::TextureInstInfo info{};
140 info.type.Assign(GetType(tld.type));
141 const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
142
143 IR::Reg dest_reg{tld.dest_reg};
144 for (size_t element = 0; element < 4; ++element) {
145 if (((tld.mask >> element) & 1) == 0) {
146 continue;
147 }
148 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
149 ++dest_reg;
150 }
151 if (tld.sparse_pred != IR::Pred::PT) {
152 v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
153 }
154}
155} // Anonymous namespace
156
157void TranslatorVisitor::TLD(u64 insn) {
158 Impl(*this, insn, false);
159}
160
161void TranslatorVisitor::TLD_b(u64 insn) {
162 Impl(*this, insn, true);
163}
164
165} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
new file mode 100644
index 000000000..5dd7e31b2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -0,0 +1,242 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F16,
16 F32,
17};
18
19constexpr unsigned R = 1;
20constexpr unsigned G = 2;
21constexpr unsigned B = 4;
22constexpr unsigned A = 8;
23
24constexpr std::array RG_LUT{
25 R, //
26 G, //
27 B, //
28 A, //
29 R | G, //
30 R | A, //
31 G | A, //
32 B | A, //
33};
34
35constexpr std::array RGBA_LUT{
36 R | G | B, //
37 R | G | A, //
38 R | B | A, //
39 G | B | A, //
40 R | G | B | A, //
41};
42
43union Encoding {
44 u64 raw;
45 BitField<59, 1, Precision> precision;
46 BitField<54, 1, u64> aoffi;
47 BitField<53, 1, u64> lod;
48 BitField<55, 1, u64> ms;
49 BitField<49, 1, u64> nodep;
50 BitField<28, 8, IR::Reg> dest_reg_b;
51 BitField<0, 8, IR::Reg> dest_reg_a;
52 BitField<8, 8, IR::Reg> src_reg_a;
53 BitField<20, 8, IR::Reg> src_reg_b;
54 BitField<36, 13, u64> cbuf_offset;
55 BitField<50, 3, u64> swizzle;
56 BitField<53, 4, u64> encoding;
57};
58
59void CheckAlignment(IR::Reg reg, size_t alignment) {
60 if (!IR::IsAligned(reg, alignment)) {
61 throw NotImplementedException("Unaligned source register {}", reg);
62 }
63}
64
65IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
66 const IR::U32 value{v.X(reg)};
67 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
68 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
69}
70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding tlds{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
74 const IR::Reg reg_a{tlds.src_reg_a};
75 const IR::Reg reg_b{tlds.src_reg_b};
76 IR::Value coords;
77 IR::U32 lod{v.ir.Imm32(0U)};
78 IR::Value offsets;
79 IR::U32 multisample;
80 Shader::TextureType texture_type{};
81 switch (tlds.encoding) {
82 case 0:
83 texture_type = Shader::TextureType::Color1D;
84 coords = v.X(reg_a);
85 break;
86 case 1:
87 texture_type = Shader::TextureType::Color1D;
88 coords = v.X(reg_a);
89 lod = v.X(reg_b);
90 break;
91 case 2:
92 texture_type = Shader::TextureType::Color2D;
93 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
94 break;
95 case 4:
96 CheckAlignment(reg_a, 2);
97 texture_type = Shader::TextureType::Color2D;
98 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
99 offsets = MakeOffset(v, reg_b);
100 break;
101 case 5:
102 CheckAlignment(reg_a, 2);
103 texture_type = Shader::TextureType::Color2D;
104 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
105 lod = v.X(reg_b);
106 break;
107 case 6:
108 CheckAlignment(reg_a, 2);
109 texture_type = Shader::TextureType::Color2D;
110 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
111 multisample = v.X(reg_b);
112 break;
113 case 7:
114 CheckAlignment(reg_a, 2);
115 texture_type = Shader::TextureType::Color3D;
116 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
117 break;
118 case 8: {
119 CheckAlignment(reg_b, 2);
120 const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
121 texture_type = Shader::TextureType::ColorArray2D;
122 coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
123 break;
124 }
125 case 12:
126 CheckAlignment(reg_a, 2);
127 CheckAlignment(reg_b, 2);
128 texture_type = Shader::TextureType::Color2D;
129 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
130 lod = v.X(reg_b);
131 offsets = MakeOffset(v, reg_b + 1);
132 break;
133 default:
134 throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
135 }
136 IR::TextureInstInfo info{};
137 if (tlds.precision == Precision::F16) {
138 info.relaxed_precision.Assign(1);
139 }
140 info.type.Assign(texture_type);
141 return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
142}
143
144unsigned Swizzle(u64 insn) {
145 const Encoding tlds{insn};
146 const size_t encoding{tlds.swizzle};
147 if (tlds.dest_reg_b == IR::Reg::RZ) {
148 if (encoding >= RG_LUT.size()) {
149 throw NotImplementedException("Illegal RG encoding {}", encoding);
150 }
151 return RG_LUT[encoding];
152 } else {
153 if (encoding >= RGBA_LUT.size()) {
154 throw NotImplementedException("Illegal RGBA encoding {}", encoding);
155 }
156 return RGBA_LUT[encoding];
157 }
158}
159
160IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
161 return IR::F32{v.ir.CompositeExtract(sample, component)};
162}
163
164IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
165 const Encoding tlds{insn};
166 switch (index) {
167 case 0:
168 return tlds.dest_reg_a;
169 case 1:
170 CheckAlignment(tlds.dest_reg_a, 2);
171 return tlds.dest_reg_a + 1;
172 case 2:
173 return tlds.dest_reg_b;
174 case 3:
175 CheckAlignment(tlds.dest_reg_b, 2);
176 return tlds.dest_reg_b + 1;
177 }
178 throw LogicError("Invalid store index {}", index);
179}
180
181void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
182 const unsigned swizzle{Swizzle(insn)};
183 unsigned store_index{0};
184 for (unsigned component = 0; component < 4; ++component) {
185 if (((swizzle >> component) & 1) == 0) {
186 continue;
187 }
188 const IR::Reg dest{RegStoreComponent32(insn, store_index)};
189 v.F(dest, Extract(v, sample, component));
190 ++store_index;
191 }
192}
193
194IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
195 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
196}
197
198void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
199 const unsigned swizzle{Swizzle(insn)};
200 unsigned store_index{0};
201 std::array<IR::F32, 4> swizzled;
202 for (unsigned component = 0; component < 4; ++component) {
203 if (((swizzle >> component) & 1) == 0) {
204 continue;
205 }
206 swizzled[store_index] = Extract(v, sample, component);
207 ++store_index;
208 }
209 const IR::F32 zero{v.ir.Imm32(0.0f)};
210 const Encoding tlds{insn};
211 switch (store_index) {
212 case 1:
213 v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
214 break;
215 case 2:
216 case 3:
217 case 4:
218 v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
219 switch (store_index) {
220 case 2:
221 break;
222 case 3:
223 v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
224 break;
225 case 4:
226 v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
227 break;
228 }
229 break;
230 }
231}
232} // Anonymous namespace
233
234void TranslatorVisitor::TLDS(u64 insn) {
235 const IR::Value sample{Sample(*this, insn)};
236 if (Encoding{insn}.precision == Precision::F32) {
237 Store32(*this, insn, sample);
238 } else {
239 Store16(*this, insn, sample);
240 }
241}
242} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
new file mode 100644
index 000000000..aea3c0e62
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -0,0 +1,131 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
49 // The ISA reads an array component here, but this is not needed on high level shading languages
50 // We are dropping this information.
51 switch (type) {
52 case TextureType::_1D:
53 return v.F(reg);
54 case TextureType::ARRAY_1D:
55 return v.F(reg + 1);
56 case TextureType::_2D:
57 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
58 case TextureType::ARRAY_2D:
59 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
60 case TextureType::_3D:
61 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
62 case TextureType::ARRAY_3D:
63 throw NotImplementedException("3D array texture type");
64 case TextureType::CUBE:
65 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
66 case TextureType::ARRAY_CUBE:
67 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
68 }
69 throw NotImplementedException("Invalid texture type {}", type);
70}
71
72void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
73 union {
74 u64 raw;
75 BitField<49, 1, u64> nodep;
76 BitField<35, 1, u64> ndv;
77 BitField<0, 8, IR::Reg> dest_reg;
78 BitField<8, 8, IR::Reg> coord_reg;
79 BitField<20, 8, IR::Reg> meta_reg;
80 BitField<28, 3, TextureType> type;
81 BitField<31, 4, u64> mask;
82 BitField<36, 13, u64> cbuf_offset;
83 } const tmml{insn};
84
85 if ((tmml.mask & 0b1100) != 0) {
86 throw NotImplementedException("TMML BA results are not implmented");
87 }
88 const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
89
90 IR::U32 handle;
91 IR::Reg meta_reg{tmml.meta_reg};
92 if (is_bindless) {
93 handle = v.X(meta_reg++);
94 } else {
95 handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
96 }
97 IR::TextureInstInfo info{};
98 info.type.Assign(GetType(tmml.type));
99 const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
100
101 IR::Reg dest_reg{tmml.dest_reg};
102 for (size_t element = 0; element < 4; ++element) {
103 if (((tmml.mask >> element) & 1) == 0) {
104 continue;
105 }
106 IR::F32 value{v.ir.CompositeExtract(sample, element)};
107 if (element < 2) {
108 IR::U32 casted_value;
109 if (element == 0) {
110 casted_value = v.ir.ConvertFToU(32, value);
111 } else {
112 casted_value = v.ir.ConvertFToS(16, value);
113 }
114 v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
115 } else {
116 v.F(dest_reg, value);
117 }
118 ++dest_reg;
119 }
120}
121} // Anonymous namespace
122
123void TranslatorVisitor::TMML(u64 insn) {
124 Impl(*this, insn, false);
125}
126
127void TranslatorVisitor::TMML_b(u64 insn) {
128 Impl(*this, insn, true);
129}
130
131} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
new file mode 100644
index 000000000..0459e5473
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -0,0 +1,76 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Mode : u64 {
15 Dimension = 1,
16 TextureType = 2,
17 SamplePos = 5,
18};
19
20IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
21 switch (mode) {
22 case Mode::Dimension: {
23 const IR::U32 lod{v.X(src_reg)};
24 return v.ir.ImageQueryDimension(handle, lod);
25 }
26 case Mode::TextureType:
27 case Mode::SamplePos:
28 default:
29 throw NotImplementedException("Mode {}", mode);
30 }
31}
32
33void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
34 union {
35 u64 raw;
36 BitField<49, 1, u64> nodep;
37 BitField<0, 8, IR::Reg> dest_reg;
38 BitField<8, 8, IR::Reg> src_reg;
39 BitField<22, 3, Mode> mode;
40 BitField<31, 4, u64> mask;
41 } const txq{insn};
42
43 IR::Reg src_reg{txq.src_reg};
44 IR::U32 handle;
45 if (cbuf_offset) {
46 handle = v.ir.Imm32(*cbuf_offset);
47 } else {
48 handle = v.X(src_reg);
49 ++src_reg;
50 }
51 const IR::Value query{Query(v, handle, txq.mode, src_reg)};
52 IR::Reg dest_reg{txq.dest_reg};
53 for (int element = 0; element < 4; ++element) {
54 if (((txq.mask >> element) & 1) == 0) {
55 continue;
56 }
57 v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
58 ++dest_reg;
59 }
60}
61} // Anonymous namespace
62
63void TranslatorVisitor::TXQ(u64 insn) {
64 union {
65 u64 raw;
66 BitField<36, 13, u64> cbuf_offset;
67 } const txq{insn};
68
69 Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
70}
71
72void TranslatorVisitor::TXQ_b(u64 insn) {
73 Impl(*this, insn, std::nullopt);
74}
75
76} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
new file mode 100644
index 000000000..e1f4174cf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/exception.h"
6#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
7
8namespace Shader::Maxwell {
9
10IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
11 u32 selector, bool is_signed) {
12 switch (width) {
13 case VideoWidth::Byte:
14 case VideoWidth::Unknown:
15 return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
16 case VideoWidth::Short:
17 return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
18 case VideoWidth::Word:
19 return value;
20 default:
21 throw NotImplementedException("Unknown VideoWidth {}", width);
22 }
23}
24
25VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
26 // immediates must be 16-bit format.
27 return is_immediate ? VideoWidth::Short : width;
28}
29
30} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
new file mode 100644
index 000000000..40c0b907c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
@@ -0,0 +1,23 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11enum class VideoWidth : u64 {
12 Byte,
13 Unknown,
14 Short,
15 Word,
16};
17
18[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
19 VideoWidth width, u32 selector, bool is_signed);
20
21[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
22
23} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
new file mode 100644
index 000000000..78869601f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -0,0 +1,92 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class VideoMinMaxOps : u64 {
13 MRG_16H,
14 MRG_16L,
15 MRG_8B0,
16 MRG_8B2,
17 ACC,
18 MIN,
19 MAX,
20};
21
22[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
23 VideoMinMaxOps op, bool is_signed) {
24 switch (op) {
25 case VideoMinMaxOps::MIN:
26 return ir.IMin(lhs, rhs, is_signed);
27 case VideoMinMaxOps::MAX:
28 return ir.IMax(lhs, rhs, is_signed);
29 default:
30 throw NotImplementedException("VMNMX op {}", op);
31 }
32}
33} // Anonymous namespace
34
35void TranslatorVisitor::VMNMX(u64 insn) {
36 union {
37 u64 raw;
38 BitField<0, 8, IR::Reg> dest_reg;
39 BitField<20, 16, u64> src_b_imm;
40 BitField<28, 2, u64> src_b_selector;
41 BitField<29, 2, VideoWidth> src_b_width;
42 BitField<36, 2, u64> src_a_selector;
43 BitField<37, 2, VideoWidth> src_a_width;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> src_a_sign;
46 BitField<49, 1, u64> src_b_sign;
47 BitField<50, 1, u64> is_src_b_reg;
48 BitField<51, 3, VideoMinMaxOps> op;
49 BitField<54, 1, u64> dest_sign;
50 BitField<55, 1, u64> sat;
51 BitField<56, 1, u64> mx;
52 } const vmnmx{insn};
53
54 if (vmnmx.cc != 0) {
55 throw NotImplementedException("VMNMX CC");
56 }
57 if (vmnmx.sat != 0) {
58 throw NotImplementedException("VMNMX SAT");
59 }
60 // Selectors were shown to default to 2 in unit tests
61 if (vmnmx.src_a_selector != 2) {
62 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
63 }
64 if (vmnmx.src_b_selector != 2) {
65 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
66 }
67 if (vmnmx.src_a_width != VideoWidth::Word) {
68 throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
69 }
70
71 const bool is_b_imm{vmnmx.is_src_b_reg == 0};
72 const IR::U32 src_a{GetReg8(insn)};
73 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
74 const IR::U32 src_c{GetReg39(insn)};
75
76 const VideoWidth a_width{vmnmx.src_a_width};
77 const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
78
79 const bool src_a_signed{vmnmx.src_a_sign != 0};
80 const bool src_b_signed{vmnmx.src_b_sign != 0};
81 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
82 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
83
84 // First operation's sign is only dependent on operand b's sign
85 const bool op_1_signed{src_b_signed};
86
87 const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
88 : ir.IMin(op_a, op_b, op_1_signed)};
89 X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
90}
91
92} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
new file mode 100644
index 000000000..cc2e6d6e6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
@@ -0,0 +1,64 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::VMAD(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<20, 16, u64> src_b_imm;
16 BitField<28, 2, u64> src_b_selector;
17 BitField<29, 2, VideoWidth> src_b_width;
18 BitField<36, 2, u64> src_a_selector;
19 BitField<37, 2, VideoWidth> src_a_width;
20 BitField<47, 1, u64> cc;
21 BitField<48, 1, u64> src_a_sign;
22 BitField<49, 1, u64> src_b_sign;
23 BitField<50, 1, u64> is_src_b_reg;
24 BitField<51, 2, u64> scale;
25 BitField<53, 1, u64> src_c_neg;
26 BitField<54, 1, u64> src_a_neg;
27 BitField<55, 1, u64> sat;
28 } const vmad{insn};
29
30 if (vmad.cc != 0) {
31 throw NotImplementedException("VMAD CC");
32 }
33 if (vmad.sat != 0) {
34 throw NotImplementedException("VMAD SAT");
35 }
36 if (vmad.scale != 0) {
37 throw NotImplementedException("VMAD SCALE");
38 }
39 if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
40 throw NotImplementedException("VMAD PO");
41 }
42 if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
43 throw NotImplementedException("VMAD NEG");
44 }
45 const bool is_b_imm{vmad.is_src_b_reg == 0};
46 const IR::U32 src_a{GetReg8(insn)};
47 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
48 const IR::U32 src_c{GetReg39(insn)};
49
50 const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
51 // Immediate values can't have a selector
52 const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
53 const VideoWidth a_width{vmad.src_a_width};
54 const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
55
56 const bool src_a_signed{vmad.src_a_sign != 0};
57 const bool src_b_signed{vmad.src_b_sign != 0};
58 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
59 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
60
61 X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
62}
63
64} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
new file mode 100644
index 000000000..1b66abc33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -0,0 +1,92 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class VsetpCompareOp : u64 {
14 False = 0,
15 LessThan,
16 Equal,
17 LessThanEqual,
18 GreaterThan = 16,
19 NotEqual,
20 GreaterThanEqual,
21 True,
22};
23
24CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
25 switch (op) {
26 case VsetpCompareOp::False:
27 return CompareOp::False;
28 case VsetpCompareOp::LessThan:
29 return CompareOp::LessThan;
30 case VsetpCompareOp::Equal:
31 return CompareOp::Equal;
32 case VsetpCompareOp::LessThanEqual:
33 return CompareOp::LessThanEqual;
34 case VsetpCompareOp::GreaterThan:
35 return CompareOp::GreaterThan;
36 case VsetpCompareOp::NotEqual:
37 return CompareOp::NotEqual;
38 case VsetpCompareOp::GreaterThanEqual:
39 return CompareOp::GreaterThanEqual;
40 case VsetpCompareOp::True:
41 return CompareOp::True;
42 default:
43 throw NotImplementedException("Invalid compare op {}", op);
44 }
45}
46} // Anonymous namespace
47
48void TranslatorVisitor::VSETP(u64 insn) {
49 union {
50 u64 raw;
51 BitField<0, 3, IR::Pred> dest_pred_b;
52 BitField<3, 3, IR::Pred> dest_pred_a;
53 BitField<20, 16, u64> src_b_imm;
54 BitField<28, 2, u64> src_b_selector;
55 BitField<29, 2, VideoWidth> src_b_width;
56 BitField<36, 2, u64> src_a_selector;
57 BitField<37, 2, VideoWidth> src_a_width;
58 BitField<39, 3, IR::Pred> bop_pred;
59 BitField<42, 1, u64> neg_bop_pred;
60 BitField<43, 5, VsetpCompareOp> compare_op;
61 BitField<45, 2, BooleanOp> bop;
62 BitField<48, 1, u64> src_a_sign;
63 BitField<49, 1, u64> src_b_sign;
64 BitField<50, 1, u64> is_src_b_reg;
65 } const vsetp{insn};
66
67 const bool is_b_imm{vsetp.is_src_b_reg == 0};
68 const IR::U32 src_a{GetReg8(insn)};
69 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
70
71 const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
72 const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
73 const VideoWidth a_width{vsetp.src_a_width};
74 const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
75
76 const bool src_a_signed{vsetp.src_a_sign != 0};
77 const bool src_b_signed{vsetp.src_b_sign != 0};
78 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
79 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
80
81 // Compare operation's sign is only dependent on operand b's sign
82 const bool compare_signed{src_b_signed};
83 const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
84 const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
85 const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
86 const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
87 const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
88 ir.SetPred(vsetp.dest_pred_a, result_a);
89 ir.SetPred(vsetp.dest_pred_b, result_b);
90}
91
92} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..7ce370f09
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,54 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class VoteOp : u64 {
12 ALL,
13 ANY,
14 EQ,
15};
16
17[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
18 switch (vote_op) {
19 case VoteOp::ALL:
20 return ir.VoteAll(pred);
21 case VoteOp::ANY:
22 return ir.VoteAny(pred);
23 case VoteOp::EQ:
24 return ir.VoteEqual(pred);
25 default:
26 throw NotImplementedException("Invalid VOTE op {}", vote_op);
27 }
28}
29
30void Vote(TranslatorVisitor& v, u64 insn) {
31 union {
32 u64 insn;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<39, 3, IR::Pred> pred_a;
35 BitField<42, 1, u64> neg_pred_a;
36 BitField<45, 3, IR::Pred> pred_b;
37 BitField<48, 2, VoteOp> vote_op;
38 } const vote{insn};
39
40 const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
41 v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
42 v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::VOTE(u64 insn) {
47 Vote(*this, insn);
48}
49
50void TranslatorVisitor::VOTE_vtg(u64) {
51 LOG_WARNING(Shader, "(STUBBED) called");
52}
53
54} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 000000000..550fed55c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class ShuffleMode : u64 {
14 IDX,
15 UP,
16 DOWN,
17 BFLY,
18};
19
20[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
21 const IR::U32& index, const IR::U32& mask,
22 ShuffleMode shfl_op) {
23 const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
24 const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
25 switch (shfl_op) {
26 case ShuffleMode::IDX:
27 return ir.ShuffleIndex(value, index, clamp, seg_mask);
28 case ShuffleMode::UP:
29 return ir.ShuffleUp(value, index, clamp, seg_mask);
30 case ShuffleMode::DOWN:
31 return ir.ShuffleDown(value, index, clamp, seg_mask);
32 case ShuffleMode::BFLY:
33 return ir.ShuffleButterfly(value, index, clamp, seg_mask);
34 default:
35 throw NotImplementedException("Invalid SHFL op {}", shfl_op);
36 }
37}
38
39void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
40 union {
41 u64 insn;
42 BitField<0, 8, IR::Reg> dest_reg;
43 BitField<8, 8, IR::Reg> src_reg;
44 BitField<30, 2, ShuffleMode> mode;
45 BitField<48, 3, IR::Pred> pred;
46 } const shfl{insn};
47
48 const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
49 v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
50 v.X(shfl.dest_reg, result);
51}
52} // Anonymous namespace
53
54void TranslatorVisitor::SHFL(u64 insn) {
55 union {
56 u64 insn;
57 BitField<20, 5, u64> src_a_imm;
58 BitField<28, 1, u64> src_a_flag;
59 BitField<29, 1, u64> src_b_flag;
60 BitField<34, 13, u64> src_b_imm;
61 } const flags{insn};
62 const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
63 : GetReg20(insn)};
64 const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
65 : GetReg39(insn)};
66 Shuffle(*this, insn, src_a, src_b);
67}
68
69} // namespace Shader::Maxwell