summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/frontend
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/frontend')
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.cpp4
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.h2
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp200
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h67
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.h12
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h28
-rw-r--r--src/shader_recompiler/frontend/ir/opcode.inc139
-rw-r--r--src/shader_recompiler/frontend/ir/pred.h11
-rw-r--r--src/shader_recompiler/frontend/maxwell/program.cpp1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h56
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp73
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp26
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h9
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp106
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp73
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp99
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp149
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp (renamed from src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp)2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp114
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp149
24 files changed, 1437 insertions, 243 deletions
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
index e795618fc..249251dd0 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.cpp
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -23,8 +23,8 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
23} 23}
24 24
25Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, 25Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
26 std::initializer_list<Value> args) { 26 std::initializer_list<Value> args, u64 flags) {
27 Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)}; 27 Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)};
28 const auto result_it{instructions.insert(insertion_point, *inst)}; 28 const auto result_it{instructions.insert(insertion_point, *inst)};
29 29
30 if (inst->NumArgs() != args.size()) { 30 if (inst->NumArgs() != args.size()) {
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
index 4b6b80c4b..ec4a41cb1 100644
--- a/src/shader_recompiler/frontend/ir/basic_block.h
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -39,7 +39,7 @@ public:
39 39
40 /// Prepends a new instruction to this basic block before the insertion point. 40 /// Prepends a new instruction to this basic block before the insertion point.
41 iterator PrependNewInst(iterator insertion_point, Opcode op, 41 iterator PrependNewInst(iterator insertion_point, Opcode op,
42 std::initializer_list<Value> args = {}); 42 std::initializer_list<Value> args = {}, u64 flags = 0);
43 43
44 /// Adds a new immediate predecessor to the basic block. 44 /// Adds a new immediate predecessor to the basic block.
45 void AddImmediatePredecessor(IR::Block* immediate_predecessor); 45 void AddImmediatePredecessor(IR::Block* immediate_predecessor);
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 6450e4b2c..87b253c9a 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -129,6 +129,58 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) {
129 Inst(Opcode::SetAttribute, attribute, value); 129 Inst(Opcode::SetAttribute, attribute, value);
130} 130}
131 131
132U32 IREmitter::WorkgroupIdX() {
133 return Inst<U32>(Opcode::WorkgroupIdX);
134}
135
136U32 IREmitter::WorkgroupIdY() {
137 return Inst<U32>(Opcode::WorkgroupIdY);
138}
139
140U32 IREmitter::WorkgroupIdZ() {
141 return Inst<U32>(Opcode::WorkgroupIdZ);
142}
143
144U32 IREmitter::LocalInvocationIdX() {
145 return Inst<U32>(Opcode::LocalInvocationIdX);
146}
147
148U32 IREmitter::LocalInvocationIdY() {
149 return Inst<U32>(Opcode::LocalInvocationIdY);
150}
151
152U32 IREmitter::LocalInvocationIdZ() {
153 return Inst<U32>(Opcode::LocalInvocationIdZ);
154}
155
156U32 IREmitter::LoadGlobalU8(const U64& address) {
157 return Inst<U32>(Opcode::LoadGlobalU8, address);
158}
159
160U32 IREmitter::LoadGlobalS8(const U64& address) {
161 return Inst<U32>(Opcode::LoadGlobalS8, address);
162}
163
164U32 IREmitter::LoadGlobalU16(const U64& address) {
165 return Inst<U32>(Opcode::LoadGlobalU16, address);
166}
167
168U32 IREmitter::LoadGlobalS16(const U64& address) {
169 return Inst<U32>(Opcode::LoadGlobalS16, address);
170}
171
172U32 IREmitter::LoadGlobal32(const U64& address) {
173 return Inst<U32>(Opcode::LoadGlobal32, address);
174}
175
176Value IREmitter::LoadGlobal64(const U64& address) {
177 return Inst<Value>(Opcode::LoadGlobal64, address);
178}
179
180Value IREmitter::LoadGlobal128(const U64& address) {
181 return Inst<Value>(Opcode::LoadGlobal128, address);
182}
183
132void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { 184void IREmitter::WriteGlobalU8(const U64& address, const U32& value) {
133 Inst(Opcode::WriteGlobalU8, address, value); 185 Inst(Opcode::WriteGlobalU8, address, value);
134} 186}
@@ -173,17 +225,17 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) {
173 return Inst<U1>(Opcode::GetOverflowFromOp, op); 225 return Inst<U1>(Opcode::GetOverflowFromOp, op);
174} 226}
175 227
176U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { 228U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) {
177 if (a.Type() != a.Type()) { 229 if (a.Type() != a.Type()) {
178 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); 230 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
179 } 231 }
180 switch (a.Type()) { 232 switch (a.Type()) {
181 case Type::U16: 233 case Type::U16:
182 return Inst<U16>(Opcode::FPAdd16, a, b); 234 return Inst<U16>(Opcode::FPAdd16, Flags{control}, a, b);
183 case Type::U32: 235 case Type::U32:
184 return Inst<U32>(Opcode::FPAdd32, a, b); 236 return Inst<U32>(Opcode::FPAdd32, Flags{control}, a, b);
185 case Type::U64: 237 case Type::U64:
186 return Inst<U64>(Opcode::FPAdd64, a, b); 238 return Inst<U64>(Opcode::FPAdd64, Flags{control}, a, b);
187 default: 239 default:
188 ThrowInvalidType(a.Type()); 240 ThrowInvalidType(a.Type());
189 } 241 }
@@ -191,14 +243,14 @@ U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) {
191 243
192Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { 244Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) {
193 if (e1.Type() != e2.Type()) { 245 if (e1.Type() != e2.Type()) {
194 throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type()); 246 throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
195 } 247 }
196 return Inst(Opcode::CompositeConstruct2, e1, e2); 248 return Inst(Opcode::CompositeConstruct2, e1, e2);
197} 249}
198 250
199Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { 251Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) {
200 if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { 252 if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
201 throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type()); 253 throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
202 } 254 }
203 return Inst(Opcode::CompositeConstruct3, e1, e2, e3); 255 return Inst(Opcode::CompositeConstruct3, e1, e2, e3);
204} 256}
@@ -206,8 +258,8 @@ Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny&
206Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, 258Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3,
207 const UAny& e4) { 259 const UAny& e4) {
208 if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { 260 if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
209 throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(), 261 throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
210 e4.Type()); 262 e3.Type(), e4.Type());
211 } 263 }
212 return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); 264 return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4);
213} 265}
@@ -219,6 +271,24 @@ UAny IREmitter::CompositeExtract(const Value& vector, size_t element) {
219 return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element))); 271 return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element)));
220} 272}
221 273
274UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) {
275 if (true_value.Type() != false_value.Type()) {
276 throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
277 }
278 switch (true_value.Type()) {
279 case Type::U8:
280 return Inst<UAny>(Opcode::Select8, condition, true_value, false_value);
281 case Type::U16:
282 return Inst<UAny>(Opcode::Select16, condition, true_value, false_value);
283 case Type::U32:
284 return Inst<UAny>(Opcode::Select32, condition, true_value, false_value);
285 case Type::U64:
286 return Inst<UAny>(Opcode::Select64, condition, true_value, false_value);
287 default:
288 throw InvalidArgument("Invalid type {}", true_value.Type());
289 }
290}
291
222U64 IREmitter::PackUint2x32(const Value& vector) { 292U64 IREmitter::PackUint2x32(const Value& vector) {
223 return Inst<U64>(Opcode::PackUint2x32, vector); 293 return Inst<U64>(Opcode::PackUint2x32, vector);
224} 294}
@@ -243,17 +313,34 @@ Value IREmitter::UnpackDouble2x32(const U64& value) {
243 return Inst<Value>(Opcode::UnpackDouble2x32, value); 313 return Inst<Value>(Opcode::UnpackDouble2x32, value);
244} 314}
245 315
246U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) { 316U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) {
247 if (a.Type() != b.Type()) { 317 if (a.Type() != b.Type()) {
248 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); 318 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
249 } 319 }
250 switch (a.Type()) { 320 switch (a.Type()) {
251 case Type::U16: 321 case Type::U16:
252 return Inst<U16>(Opcode::FPMul16, a, b); 322 return Inst<U16>(Opcode::FPMul16, Flags{control}, a, b);
253 case Type::U32: 323 case Type::U32:
254 return Inst<U32>(Opcode::FPMul32, a, b); 324 return Inst<U32>(Opcode::FPMul32, Flags{control}, a, b);
255 case Type::U64: 325 case Type::U64:
256 return Inst<U64>(Opcode::FPMul64, a, b); 326 return Inst<U64>(Opcode::FPMul64, Flags{control}, a, b);
327 default:
328 ThrowInvalidType(a.Type());
329 }
330}
331
332U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c,
333 FpControl control) {
334 if (a.Type() != b.Type() || a.Type() != c.Type()) {
335 throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
336 }
337 switch (a.Type()) {
338 case Type::U16:
339 return Inst<U16>(Opcode::FPFma16, Flags{control}, a, b, c);
340 case Type::U32:
341 return Inst<U32>(Opcode::FPFma32, Flags{control}, a, b, c);
342 case Type::U64:
343 return Inst<U64>(Opcode::FPFma64, Flags{control}, a, b, c);
257 default: 344 default:
258 ThrowInvalidType(a.Type()); 345 ThrowInvalidType(a.Type());
259 } 346 }
@@ -403,6 +490,91 @@ U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) {
403 } 490 }
404} 491}
405 492
493U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
494 if (a.Type() != b.Type()) {
495 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
496 }
497 switch (a.Type()) {
498 case Type::U32:
499 return Inst<U32>(Opcode::IAdd32, a, b);
500 case Type::U64:
501 return Inst<U64>(Opcode::IAdd64, a, b);
502 default:
503 ThrowInvalidType(a.Type());
504 }
505}
506
507U32 IREmitter::IMul(const U32& a, const U32& b) {
508 return Inst<U32>(Opcode::IMul32, a, b);
509}
510
511U32 IREmitter::INeg(const U32& value) {
512 return Inst<U32>(Opcode::INeg32, value);
513}
514
515U32 IREmitter::IAbs(const U32& value) {
516 return Inst<U32>(Opcode::IAbs32, value);
517}
518
519U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) {
520 return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift);
521}
522
523U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) {
524 return Inst<U32>(Opcode::ShiftRightLogical32, base, shift);
525}
526
527U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) {
528 return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift);
529}
530
531U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
532 return Inst<U32>(Opcode::BitwiseAnd32, a, b);
533}
534
535U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
536 return Inst<U32>(Opcode::BitwiseOr32, a, b);
537}
538
539U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
540 return Inst<U32>(Opcode::BitwiseXor32, a, b);
541}
542
543U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
544 const U32& count) {
545 return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count);
546}
547
548U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count,
549 bool is_signed) {
550 return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset,
551 count);
552}
553
554U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
555 return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
556}
557
558U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) {
559 return Inst<U1>(Opcode::IEqual, lhs, rhs);
560}
561
562U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
563 return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
564}
565
566U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
567 return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
568}
569
570U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) {
571 return Inst<U1>(Opcode::INotEqual, lhs, rhs);
572}
573
574U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
575 return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
576}
577
406U1 IREmitter::LogicalOr(const U1& a, const U1& b) { 578U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
407 return Inst<U1>(Opcode::LogicalOr, a, b); 579 return Inst<U1>(Opcode::LogicalOr, a, b);
408} 580}
@@ -411,6 +583,10 @@ U1 IREmitter::LogicalAnd(const U1& a, const U1& b) {
411 return Inst<U1>(Opcode::LogicalAnd, a, b); 583 return Inst<U1>(Opcode::LogicalAnd, a, b);
412} 584}
413 585
586U1 IREmitter::LogicalXor(const U1& a, const U1& b) {
587 return Inst<U1>(Opcode::LogicalXor, a, b);
588}
589
414U1 IREmitter::LogicalNot(const U1& value) { 590U1 IREmitter::LogicalNot(const U1& value) {
415 return Inst<U1>(Opcode::LogicalNot, value); 591 return Inst<U1>(Opcode::LogicalNot, value);
416} 592}
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 1af79f41c..7ff763ecf 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -4,8 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstring>
8#include <type_traits>
9
7#include "shader_recompiler/frontend/ir/attribute.h" 10#include "shader_recompiler/frontend/ir/attribute.h"
8#include "shader_recompiler/frontend/ir/basic_block.h" 11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/frontend/ir/modifiers.h"
9#include "shader_recompiler/frontend/ir/value.h" 13#include "shader_recompiler/frontend/ir/value.h"
10 14
11namespace Shader::IR { 15namespace Shader::IR {
@@ -52,6 +56,22 @@ public:
52 [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); 56 [[nodiscard]] U32 GetAttribute(IR::Attribute attribute);
53 void SetAttribute(IR::Attribute attribute, const U32& value); 57 void SetAttribute(IR::Attribute attribute, const U32& value);
54 58
59 [[nodiscard]] U32 WorkgroupIdX();
60 [[nodiscard]] U32 WorkgroupIdY();
61 [[nodiscard]] U32 WorkgroupIdZ();
62
63 [[nodiscard]] U32 LocalInvocationIdX();
64 [[nodiscard]] U32 LocalInvocationIdY();
65 [[nodiscard]] U32 LocalInvocationIdZ();
66
67 [[nodiscard]] U32 LoadGlobalU8(const U64& address);
68 [[nodiscard]] U32 LoadGlobalS8(const U64& address);
69 [[nodiscard]] U32 LoadGlobalU16(const U64& address);
70 [[nodiscard]] U32 LoadGlobalS16(const U64& address);
71 [[nodiscard]] U32 LoadGlobal32(const U64& address);
72 [[nodiscard]] Value LoadGlobal64(const U64& address);
73 [[nodiscard]] Value LoadGlobal128(const U64& address);
74
55 void WriteGlobalU8(const U64& address, const U32& value); 75 void WriteGlobalU8(const U64& address, const U32& value);
56 void WriteGlobalS8(const U64& address, const U32& value); 76 void WriteGlobalS8(const U64& address, const U32& value);
57 void WriteGlobalU16(const U64& address, const U32& value); 77 void WriteGlobalU16(const U64& address, const U32& value);
@@ -71,6 +91,8 @@ public:
71 const UAny& e4); 91 const UAny& e4);
72 [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); 92 [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element);
73 93
94 [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value);
95
74 [[nodiscard]] U64 PackUint2x32(const Value& vector); 96 [[nodiscard]] U64 PackUint2x32(const Value& vector);
75 [[nodiscard]] Value UnpackUint2x32(const U64& value); 97 [[nodiscard]] Value UnpackUint2x32(const U64& value);
76 98
@@ -80,8 +102,10 @@ public:
80 [[nodiscard]] U64 PackDouble2x32(const Value& vector); 102 [[nodiscard]] U64 PackDouble2x32(const Value& vector);
81 [[nodiscard]] Value UnpackDouble2x32(const U64& value); 103 [[nodiscard]] Value UnpackDouble2x32(const U64& value);
82 104
83 [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b); 105 [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {});
84 [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b); 106 [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {});
107 [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c,
108 FpControl control = {});
85 109
86 [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); 110 [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value);
87 [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); 111 [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value);
@@ -100,8 +124,31 @@ public:
100 [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); 124 [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value);
101 [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); 125 [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value);
102 126
127 [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
128 [[nodiscard]] U32 IMul(const U32& a, const U32& b);
129 [[nodiscard]] U32 INeg(const U32& value);
130 [[nodiscard]] U32 IAbs(const U32& value);
131 [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift);
132 [[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift);
133 [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift);
134 [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
135 [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
136 [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
137 [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
138 const U32& count);
139 [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
140 bool is_signed);
141
142 [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
143 [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs);
144 [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
145 [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
146 [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
147 [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
148
103 [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); 149 [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
104 [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); 150 [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
151 [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
105 [[nodiscard]] U1 LogicalNot(const U1& value); 152 [[nodiscard]] U1 LogicalNot(const U1& value);
106 153
107 [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); 154 [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value);
@@ -118,6 +165,22 @@ private:
118 auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; 165 auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})};
119 return T{Value{&*it}}; 166 return T{Value{&*it}};
120 } 167 }
168
169 template <typename T>
170 requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v<T>) struct Flags {
171 Flags() = default;
172 Flags(T proxy_) : proxy{proxy_} {}
173
174 T proxy;
175 };
176
177 template <typename T = Value, typename FlagType, typename... Args>
178 T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
179 u64 raw_flags{};
180 std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
181 auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
182 return T{Value{&*it}};
183 }
121}; 184};
122 185
123} // namespace Shader::IR 186} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 7f1ed6710..61849695a 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -5,7 +5,9 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cstring>
8#include <span> 9#include <span>
10#include <type_traits>
9#include <vector> 11#include <vector>
10 12
11#include <boost/intrusive/list.hpp> 13#include <boost/intrusive/list.hpp>
@@ -23,7 +25,7 @@ constexpr size_t MAX_ARG_COUNT = 4;
23 25
24class Inst : public boost::intrusive::list_base_hook<> { 26class Inst : public boost::intrusive::list_base_hook<> {
25public: 27public:
26 explicit Inst(Opcode op_) noexcept : op(op_) {} 28 explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {}
27 29
28 /// Get the number of uses this instruction has. 30 /// Get the number of uses this instruction has.
29 [[nodiscard]] int UseCount() const noexcept { 31 [[nodiscard]] int UseCount() const noexcept {
@@ -73,6 +75,14 @@ public:
73 75
74 void ReplaceUsesWith(Value replacement); 76 void ReplaceUsesWith(Value replacement);
75 77
78 template <typename FlagsType>
79 requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v<FlagsType>)
80 [[nodiscard]] FlagsType Flags() const noexcept {
81 FlagsType ret;
82 std::memcpy(&ret, &flags, sizeof(ret));
83 return ret;
84 }
85
76private: 86private:
77 void Use(const Value& value); 87 void Use(const Value& value);
78 void UndoUse(const Value& value); 88 void UndoUse(const Value& value);
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
new file mode 100644
index 000000000..28bb9e798
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Shader::IR {
8
9enum class FmzMode {
10 None, // Denorms are not flushed, NAN is propagated (nouveau)
11 FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
12 FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9)
13};
14
15enum class FpRounding {
16 RN, // Round to nearest even,
17 RM, // Round towards negative infinity
18 RP, // Round towards positive infinity
19 RZ, // Round towards zero
20};
21
22struct FpControl {
23 bool no_contraction{false};
24 FpRounding rounding : 8 = FpRounding::RN;
25 FmzMode fmz_mode : 8 = FmzMode::FTZ;
26};
27static_assert(sizeof(FpControl) <= sizeof(u64));
28} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc
index 40759e96a..4ecb5e936 100644
--- a/src/shader_recompiler/frontend/ir/opcode.inc
+++ b/src/shader_recompiler/frontend/ir/opcode.inc
@@ -35,6 +35,12 @@ OPCODE(SetZFlag, Void, U1,
35OPCODE(SetSFlag, Void, U1, ) 35OPCODE(SetSFlag, Void, U1, )
36OPCODE(SetCFlag, Void, U1, ) 36OPCODE(SetCFlag, Void, U1, )
37OPCODE(SetOFlag, Void, U1, ) 37OPCODE(SetOFlag, Void, U1, )
38OPCODE(WorkgroupIdX, U32, )
39OPCODE(WorkgroupIdY, U32, )
40OPCODE(WorkgroupIdZ, U32, )
41OPCODE(LocalInvocationIdX, U32, )
42OPCODE(LocalInvocationIdY, U32, )
43OPCODE(LocalInvocationIdZ, U32, )
38 44
39// Undefined 45// Undefined
40OPCODE(Undef1, U1, ) 46OPCODE(Undef1, U1, )
@@ -44,6 +50,13 @@ OPCODE(Undef32, U32,
44OPCODE(Undef64, U64, ) 50OPCODE(Undef64, U64, )
45 51
46// Memory operations 52// Memory operations
53OPCODE(LoadGlobalU8, U32, U64, )
54OPCODE(LoadGlobalS8, U32, U64, )
55OPCODE(LoadGlobalU16, U32, U64, )
56OPCODE(LoadGlobalS16, U32, U64, )
57OPCODE(LoadGlobal32, U32, U64, )
58OPCODE(LoadGlobal64, Opaque, U64, )
59OPCODE(LoadGlobal128, Opaque, U64, )
47OPCODE(WriteGlobalU8, Void, U64, U32, ) 60OPCODE(WriteGlobalU8, Void, U64, U32, )
48OPCODE(WriteGlobalS8, Void, U64, U32, ) 61OPCODE(WriteGlobalS8, Void, U64, U32, )
49OPCODE(WriteGlobalU16, Void, U64, U32, ) 62OPCODE(WriteGlobalU16, Void, U64, U32, )
@@ -58,6 +71,12 @@ OPCODE(CompositeConstruct3, Opaque, Opaq
58OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) 71OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, )
59OPCODE(CompositeExtract, Opaque, Opaque, U32, ) 72OPCODE(CompositeExtract, Opaque, Opaque, U32, )
60 73
74// Select operations
75OPCODE(Select8, U8, U1, U8, U8, )
76OPCODE(Select16, U16, U1, U16, U16, )
77OPCODE(Select32, U32, U1, U32, U32, )
78OPCODE(Select64, U64, U1, U64, U64, )
79
61// Bitwise conversions 80// Bitwise conversions
62OPCODE(PackUint2x32, U64, Opaque, ) 81OPCODE(PackUint2x32, U64, Opaque, )
63OPCODE(UnpackUint2x32, Opaque, U64, ) 82OPCODE(UnpackUint2x32, Opaque, U64, )
@@ -74,56 +93,84 @@ OPCODE(GetOverflowFromOp, U1, Opaq
74OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) 93OPCODE(GetZSCOFromOp, ZSCO, Opaque, )
75 94
76// Floating-point operations 95// Floating-point operations
77OPCODE(FPAbs16, U16, U16 ) 96OPCODE(FPAbs16, U16, U16, )
78OPCODE(FPAbs32, U32, U32 ) 97OPCODE(FPAbs32, U32, U32, )
79OPCODE(FPAbs64, U64, U64 ) 98OPCODE(FPAbs64, U64, U64, )
80OPCODE(FPAdd16, U16, U16, U16 ) 99OPCODE(FPAdd16, U16, U16, U16, )
81OPCODE(FPAdd32, U32, U32, U32 ) 100OPCODE(FPAdd32, U32, U32, U32, )
82OPCODE(FPAdd64, U64, U64, U64 ) 101OPCODE(FPAdd64, U64, U64, U64, )
83OPCODE(FPFma16, U16, U16, U16 ) 102OPCODE(FPFma16, U16, U16, U16, U16, )
84OPCODE(FPFma32, U32, U32, U32 ) 103OPCODE(FPFma32, U32, U32, U32, U32, )
85OPCODE(FPFma64, U64, U64, U64 ) 104OPCODE(FPFma64, U64, U64, U64, U64, )
86OPCODE(FPMax32, U32, U32, U32 ) 105OPCODE(FPMax32, U32, U32, U32, )
87OPCODE(FPMax64, U64, U64, U64 ) 106OPCODE(FPMax64, U64, U64, U64, )
88OPCODE(FPMin32, U32, U32, U32 ) 107OPCODE(FPMin32, U32, U32, U32, )
89OPCODE(FPMin64, U64, U64, U64 ) 108OPCODE(FPMin64, U64, U64, U64, )
90OPCODE(FPMul16, U16, U16, U16 ) 109OPCODE(FPMul16, U16, U16, U16, )
91OPCODE(FPMul32, U32, U32, U32 ) 110OPCODE(FPMul32, U32, U32, U32, )
92OPCODE(FPMul64, U64, U64, U64 ) 111OPCODE(FPMul64, U64, U64, U64, )
93OPCODE(FPNeg16, U16, U16 ) 112OPCODE(FPNeg16, U16, U16, )
94OPCODE(FPNeg32, U32, U32 ) 113OPCODE(FPNeg32, U32, U32, )
95OPCODE(FPNeg64, U64, U64 ) 114OPCODE(FPNeg64, U64, U64, )
96OPCODE(FPRecip32, U32, U32 ) 115OPCODE(FPRecip32, U32, U32, )
97OPCODE(FPRecip64, U64, U64 ) 116OPCODE(FPRecip64, U64, U64, )
98OPCODE(FPRecipSqrt32, U32, U32 ) 117OPCODE(FPRecipSqrt32, U32, U32, )
99OPCODE(FPRecipSqrt64, U64, U64 ) 118OPCODE(FPRecipSqrt64, U64, U64, )
100OPCODE(FPSqrt, U32, U32 ) 119OPCODE(FPSqrt, U32, U32, )
101OPCODE(FPSin, U32, U32 ) 120OPCODE(FPSin, U32, U32, )
102OPCODE(FPSinNotReduced, U32, U32 ) 121OPCODE(FPSinNotReduced, U32, U32, )
103OPCODE(FPExp2, U32, U32 ) 122OPCODE(FPExp2, U32, U32, )
104OPCODE(FPExp2NotReduced, U32, U32 ) 123OPCODE(FPExp2NotReduced, U32, U32, )
105OPCODE(FPCos, U32, U32 ) 124OPCODE(FPCos, U32, U32, )
106OPCODE(FPCosNotReduced, U32, U32 ) 125OPCODE(FPCosNotReduced, U32, U32, )
107OPCODE(FPLog2, U32, U32 ) 126OPCODE(FPLog2, U32, U32, )
108OPCODE(FPSaturate16, U16, U16 ) 127OPCODE(FPSaturate16, U16, U16, )
109OPCODE(FPSaturate32, U32, U32 ) 128OPCODE(FPSaturate32, U32, U32, )
110OPCODE(FPSaturate64, U64, U64 ) 129OPCODE(FPSaturate64, U64, U64, )
111OPCODE(FPRoundEven16, U16, U16 ) 130OPCODE(FPRoundEven16, U16, U16, )
112OPCODE(FPRoundEven32, U32, U32 ) 131OPCODE(FPRoundEven32, U32, U32, )
113OPCODE(FPRoundEven64, U64, U64 ) 132OPCODE(FPRoundEven64, U64, U64, )
114OPCODE(FPFloor16, U16, U16 ) 133OPCODE(FPFloor16, U16, U16, )
115OPCODE(FPFloor32, U32, U32 ) 134OPCODE(FPFloor32, U32, U32, )
116OPCODE(FPFloor64, U64, U64 ) 135OPCODE(FPFloor64, U64, U64, )
117OPCODE(FPCeil16, U16, U16 ) 136OPCODE(FPCeil16, U16, U16, )
118OPCODE(FPCeil32, U32, U32 ) 137OPCODE(FPCeil32, U32, U32, )
119OPCODE(FPCeil64, U64, U64 ) 138OPCODE(FPCeil64, U64, U64, )
120OPCODE(FPTrunc16, U16, U16 ) 139OPCODE(FPTrunc16, U16, U16, )
121OPCODE(FPTrunc32, U32, U32 ) 140OPCODE(FPTrunc32, U32, U32, )
122OPCODE(FPTrunc64, U64, U64 ) 141OPCODE(FPTrunc64, U64, U64, )
142
143// Integer operations
144OPCODE(IAdd32, U32, U32, U32, )
145OPCODE(IAdd64, U64, U64, U64, )
146OPCODE(IMul32, U32, U32, U32, )
147OPCODE(INeg32, U32, U32, )
148OPCODE(IAbs32, U32, U32, )
149OPCODE(ShiftLeftLogical32, U32, U32, U32, )
150OPCODE(ShiftRightLogical32, U32, U32, U32, )
151OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
152OPCODE(BitwiseAnd32, U32, U32, U32, )
153OPCODE(BitwiseOr32, U32, U32, U32, )
154OPCODE(BitwiseXor32, U32, U32, U32, )
155OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
156OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
157OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
158
159OPCODE(SLessThan, U1, U32, U32, )
160OPCODE(ULessThan, U1, U32, U32, )
161OPCODE(IEqual, U1, U32, U32, )
162OPCODE(SLessThanEqual, U1, U32, U32, )
163OPCODE(ULessThanEqual, U1, U32, U32, )
164OPCODE(SGreaterThan, U1, U32, U32, )
165OPCODE(UGreaterThan, U1, U32, U32, )
166OPCODE(INotEqual, U1, U32, U32, )
167OPCODE(SGreaterThanEqual, U1, U32, U32, )
168OPCODE(UGreaterThanEqual, U1, U32, U32, )
123 169
124// Logical operations 170// Logical operations
125OPCODE(LogicalOr, U1, U1, U1, ) 171OPCODE(LogicalOr, U1, U1, U1, )
126OPCODE(LogicalAnd, U1, U1, U1, ) 172OPCODE(LogicalAnd, U1, U1, U1, )
173OPCODE(LogicalXor, U1, U1, U1, )
127OPCODE(LogicalNot, U1, U1, ) 174OPCODE(LogicalNot, U1, U1, )
128 175
129// Conversion operations 176// Conversion operations
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
index daf23193f..c6f2f82bf 100644
--- a/src/shader_recompiler/frontend/ir/pred.h
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -8,7 +8,16 @@
8 8
9namespace Shader::IR { 9namespace Shader::IR {
10 10
11enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; 11enum class Pred : u64 {
12 P0,
13 P1,
14 P2,
15 P3,
16 P4,
17 P5,
18 P6,
19 PT,
20};
12 21
13constexpr size_t NUM_USER_PREDS = 6; 22constexpr size_t NUM_USER_PREDS = 6;
14constexpr size_t NUM_PREDS = 7; 23constexpr size_t NUM_PREDS = 7;
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 49d1f4bfb..bd1f96c07 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) {
56 Optimization::Invoke(Optimization::IdentityRemovalPass, function); 56 Optimization::Invoke(Optimization::IdentityRemovalPass, function);
57 // Optimization::Invoke(Optimization::VerificationPass, function); 57 // Optimization::Invoke(Optimization::VerificationPass, function);
58 } 58 }
59 //*/
59} 60}
60 61
61std::string DumpProgram(const Program& program) { 62std::string DumpProgram(const Program& program) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..3da37a2bb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,56 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
11
12namespace Shader::Maxwell {
13
14enum class FpRounding : u64 {
15 RN,
16 RM,
17 RP,
18 RZ,
19};
20
21enum class FmzMode : u64 {
22 None,
23 FTZ,
24 FMZ,
25 INVALIDFMZ3,
26};
27
28inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
29 switch (fp_rounding) {
30 case FpRounding::RN:
31 return IR::FpRounding::RN;
32 case FpRounding::RM:
33 return IR::FpRounding::RM;
34 case FpRounding::RP:
35 return IR::FpRounding::RP;
36 case FpRounding::RZ:
37 return IR::FpRounding::RZ;
38 }
39 throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
40}
41
42inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
43 switch (fmz_mode) {
44 case FmzMode::None:
45 return IR::FmzMode::None;
46 case FmzMode::FTZ:
47 return IR::FmzMode::FTZ;
48 case FmzMode::FMZ:
49 return IR::FmzMode::FMZ;
50 case FmzMode::INVALIDFMZ3:
51 break;
52 }
53 throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
54}
55
56} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..d2c44b9cc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
14 const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
15 union {
16 u64 raw;
17 BitField<0, 8, IR::Reg> dest_reg;
18 BitField<8, 8, IR::Reg> src_a;
19 } const fadd{insn};
20
21 if (sat) {
22 throw NotImplementedException("FADD SAT");
23 }
24 if (cc) {
25 throw NotImplementedException("FADD CC");
26 }
27 const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)};
28 const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
29 IR::FpControl control{
30 .no_contraction{true},
31 .rounding{CastFpRounding(fp_rounding)},
32 .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
33 };
34 v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
35}
36
37void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
38 union {
39 u64 raw;
40 BitField<39, 2, FpRounding> fp_rounding;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> neg_b;
43 BitField<46, 1, u64> abs_a;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> neg_a;
46 BitField<49, 1, u64> abs_b;
47 BitField<50, 1, u64> sat;
48 } const fadd{insn};
49
50 FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
51 fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::FADD_reg(u64 insn) {
56 FADD(*this, insn, GetReg20(insn));
57}
58
59void TranslatorVisitor::FADD_cbuf(u64) {
60 throw NotImplementedException("FADD (cbuf)");
61}
62
63void TranslatorVisitor::FADD_imm(u64) {
64 throw NotImplementedException("FADD (imm)");
65}
66
67void TranslatorVisitor::FADD32I(u64) {
68 throw NotImplementedException("FADD32I");
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..30ca052ec
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,73 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a,
13 bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const ffma{insn};
19
20 if (sat) {
21 throw NotImplementedException("FFMA SAT");
22 }
23 if (cc) {
24 throw NotImplementedException("FFMA CC");
25 }
26 const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)};
27 const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
28 const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
29 const IR::FpControl fp_control{
30 .no_contraction{true},
31 .rounding{CastFpRounding(fp_rounding)},
32 .fmz_mode{CastFmzMode(fmz_mode)},
33 };
34 v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control));
35}
36
37void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) {
38 union {
39 u64 raw;
40 BitField<47, 1, u64> cc;
41 BitField<48, 1, u64> neg_b;
42 BitField<49, 1, u64> neg_c;
43 BitField<50, 1, u64> sat;
44 BitField<51, 2, FpRounding> fp_rounding;
45 BitField<53, 2, FmzMode> fmz_mode;
46 } const ffma{insn};
47
48 FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
49 ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
50}
51} // Anonymous namespace
52
53void TranslatorVisitor::FFMA_reg(u64 insn) {
54 FFMA(*this, insn, GetReg20(insn), GetReg39(insn));
55}
56
57void TranslatorVisitor::FFMA_rc(u64) {
58 throw NotImplementedException("FFMA (rc)");
59}
60
61void TranslatorVisitor::FFMA_cr(u64 insn) {
62 FFMA(*this, insn, GetCbuf(insn), GetReg39(insn));
63}
64
65void TranslatorVisitor::FFMA_imm(u64) {
66 throw NotImplementedException("FFMA (imm)");
67}
68
69void TranslatorVisitor::FFMA32I(u64) {
70 throw NotImplementedException("FFMA32I");
71}
72
73} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..743a1e2f0
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class Scale : u64 {
14 None,
15 D2,
16 D4,
17 D8,
18 M8,
19 M4,
20 M2,
21 INVALIDSCALE37,
22};
23
24float ScaleFactor(Scale scale) {
25 switch (scale) {
26 case Scale::None:
27 return 1.0f;
28 case Scale::D2:
29 return 1.0f / 2.0f;
30 case Scale::D4:
31 return 1.0f / 4.0f;
32 case Scale::D8:
33 return 1.0f / 8.0f;
34 case Scale::M8:
35 return 8.0f;
36 case Scale::M4:
37 return 4.0f;
38 case Scale::M2:
39 return 2.0f;
40 case Scale::INVALIDSCALE37:
41 break;
42 }
43 throw NotImplementedException("Invalid FMUL scale {}", scale);
44}
45
46void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode,
47 FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
48 union {
49 u64 raw;
50 BitField<0, 8, IR::Reg> dest_reg;
51 BitField<8, 8, IR::Reg> src_a;
52 } const fmul{insn};
53
54 if (cc) {
55 throw NotImplementedException("FMUL CC");
56 }
57 if (sat) {
58 throw NotImplementedException("FMUL SAT");
59 }
60 IR::U32 op_a{v.X(fmul.src_a)};
61 if (scale != Scale::None) {
62 if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
63 throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
64 }
65 op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
66 }
67 const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
68 const IR::FpControl fp_control{
69 .no_contraction{true},
70 .rounding{CastFpRounding(fp_rounding)},
71 .fmz_mode{CastFmzMode(fmz_mode)},
72 };
73 v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control));
74}
75
76void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
77 union {
78 u64 raw;
79 BitField<39, 2, FpRounding> fp_rounding;
80 BitField<41, 3, Scale> scale;
81 BitField<44, 2, FmzMode> fmz;
82 BitField<47, 1, u64> cc;
83 BitField<48, 1, u64> neg_b;
84 BitField<50, 1, u64> sat;
85 } fmul{insn};
86
87 FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
88 fmul.neg_b != 0);
89}
90} // Anonymous namespace
91
92void TranslatorVisitor::FMUL_reg(u64 insn) {
93 return FMUL(*this, insn, GetReg20(insn));
94}
95
96void TranslatorVisitor::FMUL_cbuf(u64) {
97 throw NotImplementedException("FMUL (cbuf)");
98}
99
100void TranslatorVisitor::FMUL_imm(u64) {
101 throw NotImplementedException("FMUL (imm)");
102}
103
104void TranslatorVisitor::FMUL32I(u64) {
105 throw NotImplementedException("FMUL32I");
106}
107
108} // namespace Shader::Maxwell \ No newline at end of file
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 7bc7ce9f2..548c7f611 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
16 ir.SetReg(dest_reg, value); 16 ir.SetReg(dest_reg, value);
17} 17}
18 18
19IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
20 union {
21 u64 raw;
22 BitField<20, 8, IR::Reg> index;
23 } const reg{insn};
24 return X(reg.index);
25}
26
27IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
28 union {
29 u64 raw;
30 BitField<39, 8, IR::Reg> index;
31 } const reg{insn};
32 return X(reg.index);
33}
34
19IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { 35IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
20 union { 36 union {
21 u64 raw; 37 u64 raw;
@@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
33 return ir.GetCbuf(binding, byte_offset); 49 return ir.GetCbuf(binding, byte_offset);
34} 50}
35 51
36IR::U32 TranslatorVisitor::GetImm(u64 insn) { 52IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
37 union { 53 union {
38 u64 raw; 54 u64 raw;
39 BitField<20, 19, u64> value; 55 BitField<20, 19, u64> value;
@@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) {
44 return ir.Imm32(value); 60 return ir.Imm32(value);
45} 61}
46 62
63IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
64 union {
65 u64 raw;
66 BitField<20, 32, u64> value;
67 } const imm{insn};
68 return ir.Imm32(static_cast<u32>(imm.value));
69}
70
47void TranslatorVisitor::SetZFlag(const IR::U1& value) { 71void TranslatorVisitor::SetZFlag(const IR::U1& value) {
48 ir.SetZFlag(value); 72 ir.SetZFlag(value);
49} 73}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index 8be7d6ff1..ef6d977fe 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -46,7 +46,7 @@ public:
46 void DADD_reg(u64 insn); 46 void DADD_reg(u64 insn);
47 void DADD_cbuf(u64 insn); 47 void DADD_cbuf(u64 insn);
48 void DADD_imm(u64 insn); 48 void DADD_imm(u64 insn);
49 void DEPBAR(u64 insn); 49 void DEPBAR();
50 void DFMA_reg(u64 insn); 50 void DFMA_reg(u64 insn);
51 void DFMA_rc(u64 insn); 51 void DFMA_rc(u64 insn);
52 void DFMA_cr(u64 insn); 52 void DFMA_cr(u64 insn);
@@ -298,9 +298,14 @@ public:
298 [[nodiscard]] IR::U32 X(IR::Reg reg); 298 [[nodiscard]] IR::U32 X(IR::Reg reg);
299 void X(IR::Reg dest_reg, const IR::U32& value); 299 void X(IR::Reg dest_reg, const IR::U32& value);
300 300
301 [[nodiscard]] IR::U32 GetReg20(u64 insn);
302 [[nodiscard]] IR::U32 GetReg39(u64 insn);
303
301 [[nodiscard]] IR::U32 GetCbuf(u64 insn); 304 [[nodiscard]] IR::U32 GetCbuf(u64 insn);
302 305
303 [[nodiscard]] IR::U32 GetImm(u64 insn); 306 [[nodiscard]] IR::U32 GetImm20(u64 insn);
307
308 [[nodiscard]] IR::U32 GetImm32(u64 insn);
304 309
305 void SetZFlag(const IR::U1& value); 310 void SetZFlag(const IR::U1& value);
306 void SetSFlag(const IR::U1& value); 311 void SetSFlag(const IR::U1& value);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..60f79b160
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,106 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
12 bool cc) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a;
17 } const iadd{insn};
18
19 if (sat) {
20 throw NotImplementedException("IADD SAT");
21 }
22 if (x && po) {
23 throw NotImplementedException("IADD X+PO");
24 }
25 // Operand A is always read from here, negated if needed
26 IR::U32 op_a{v.X(iadd.src_a)};
27 if (neg_a) {
28 op_a = v.ir.INeg(op_a);
29 }
30 // Add both operands
31 IR::U32 result{v.ir.IAdd(op_a, op_b)};
32 if (x) {
33 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
34 result = v.ir.IAdd(result, carry);
35 }
36 if (po) {
37 // .PO adds one to the result
38 result = v.ir.IAdd(result, v.ir.Imm32(1));
39 }
40 if (cc) {
41 // Store flags
42 // TODO: Does this grab the result pre-PO or after?
43 if (po) {
44 throw NotImplementedException("IADD CC+PO");
45 }
46 // TODO: How does CC behave when X is set?
47 if (x) {
48 throw NotImplementedException("IADD X+CC");
49 }
50 v.SetZFlag(v.ir.GetZeroFromOp(result));
51 v.SetSFlag(v.ir.GetSignFromOp(result));
52 v.SetCFlag(v.ir.GetCarryFromOp(result));
53 v.SetOFlag(v.ir.GetOverflowFromOp(result));
54 }
55 // Store result
56 v.X(iadd.dest_reg, result);
57}
58
59void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
60 union {
61 u64 insn;
62 BitField<43, 1, u64> x;
63 BitField<47, 1, u64> cc;
64 BitField<48, 2, u64> three_for_po;
65 BitField<48, 1, u64> neg_b;
66 BitField<49, 1, u64> neg_a;
67 BitField<50, 1, u64> sat;
68 } const iadd{insn};
69
70 const bool po{iadd.three_for_po == 3};
71 const bool neg_a{!po && iadd.neg_a != 0};
72 if (!po && iadd.neg_b != 0) {
73 op_b = v.ir.INeg(op_b);
74 }
75 IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
76}
77} // Anonymous namespace
78
79void TranslatorVisitor::IADD_reg(u64) {
80 throw NotImplementedException("IADD (reg)");
81}
82
83void TranslatorVisitor::IADD_cbuf(u64 insn) {
84 IADD(*this, insn, GetCbuf(insn));
85}
86
87void TranslatorVisitor::IADD_imm(u64) {
88 throw NotImplementedException("IADD (imm)");
89}
90
91void TranslatorVisitor::IADD32I(u64 insn) {
92 union {
93 u64 raw;
94 BitField<52, 1, u64> cc;
95 BitField<53, 1, u64> x;
96 BitField<54, 1, u64> sat;
97 BitField<55, 2, u64> three_for_po;
98 BitField<56, 1, u64> neg_a;
99 } const iadd32i{insn};
100
101 const bool po{iadd32i.three_for_po == 3};
102 const bool neg_a{!po && iadd32i.neg_a != 0};
103 IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
104}
105
106} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..f92c0bbd6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,73 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> op_a;
16 BitField<47, 1, u64> cc;
17 BitField<48, 2, u64> three_for_po;
18 BitField<48, 1, u64> neg_b;
19 BitField<49, 1, u64> neg_a;
20 BitField<39, 5, u64> scale;
21 } const iscadd{insn};
22
23 const bool po{iscadd.three_for_po == 3};
24 IR::U32 op_a{v.X(iscadd.op_a)};
25 if (!po) {
26 // When PO is not present, the bits are interpreted as negation
27 if (iscadd.neg_a != 0) {
28 op_a = v.ir.INeg(op_a);
29 }
30 if (iscadd.neg_b != 0) {
31 op_b = v.ir.INeg(op_b);
32 }
33 }
34 // With the operands already processed, scale A
35 const IR::U32 scale{v.ir.Imm32(static_cast<u32>(iscadd.scale))};
36 const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
37
38 IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
39 if (po) {
40 // .PO adds one to the final result
41 result = v.ir.IAdd(result, v.ir.Imm32(1));
42 }
43 v.X(iscadd.dest_reg, result);
44
45 if (iscadd.cc != 0) {
46 throw NotImplementedException("ISCADD CC");
47 }
48}
49
50} // Anonymous namespace
51
52void TranslatorVisitor::ISCADD_reg(u64 insn) {
53 union {
54 u64 raw;
55 BitField<20, 8, IR::Reg> op_b;
56 } const iscadd{insn};
57
58 ISCADD(*this, insn, X(iscadd.op_b));
59}
60
61void TranslatorVisitor::ISCADD_cbuf(u64) {
62 throw NotImplementedException("ISCADD (cbuf)");
63}
64
65void TranslatorVisitor::ISCADD_imm(u64) {
66 throw NotImplementedException("ISCADD (imm)");
67}
68
69void TranslatorVisitor::ISCADD32I(u64) {
70 throw NotImplementedException("ISCADD32I");
71}
72
73} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..76c6b5291
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,99 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class CompareOp : u64 {
12 F, // Always false
13 LT, // Less than
14 EQ, // Equal
15 LE, // Less than or equal
16 GT, // Greater than
17 NE, // Not equal
18 GE, // Greater than or equal
19 T, // Always true
20};
21
22enum class Bop : u64 {
23 AND,
24 OR,
25 XOR,
26};
27
28IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs,
29 bool is_signed) {
30 switch (op) {
31 case CompareOp::F:
32 return ir.Imm1(false);
33 case CompareOp::LT:
34 return ir.ILessThan(lhs, rhs, is_signed);
35 case CompareOp::EQ:
36 return ir.IEqual(lhs, rhs);
37 case CompareOp::LE:
38 return ir.ILessThanEqual(lhs, rhs, is_signed);
39 case CompareOp::GT:
40 return ir.IGreaterThan(lhs, rhs, is_signed);
41 case CompareOp::NE:
42 return ir.INotEqual(lhs, rhs);
43 case CompareOp::GE:
44 return ir.IGreaterThanEqual(lhs, rhs, is_signed);
45 case CompareOp::T:
46 return ir.Imm1(true);
47 }
48 throw NotImplementedException("Invalid ISETP compare op {}", op);
49}
50
51IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) {
52 switch (bop) {
53 case Bop::AND:
54 return ir.LogicalAnd(comparison, bop_pred);
55 case Bop::OR:
56 return ir.LogicalOr(comparison, bop_pred);
57 case Bop::XOR:
58 return ir.LogicalXor(comparison, bop_pred);
59 }
60 throw NotImplementedException("Invalid ISETP bop {}", bop);
61}
62
63void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
64 union {
65 u64 raw;
66 BitField<0, 3, IR::Pred> dest_pred_b;
67 BitField<3, 3, IR::Pred> dest_pred_a;
68 BitField<8, 8, IR::Reg> src_reg_a;
69 BitField<39, 3, IR::Pred> bop_pred;
70 BitField<42, 1, u64> neg_bop_pred;
71 BitField<45, 2, Bop> bop;
72 BitField<48, 1, u64> is_signed;
73 BitField<49, 3, CompareOp> compare_op;
74 } const isetp{insn};
75
76 const Bop bop{isetp.bop};
77 const IR::U32 op_a{v.X(isetp.src_reg_a)};
78 const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)};
79 const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
80 const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)};
81 const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)};
82 v.ir.SetPred(isetp.dest_pred_a, result_a);
83 v.ir.SetPred(isetp.dest_pred_b, result_b);
84}
85} // Anonymous namespace
86
87void TranslatorVisitor::ISETP_reg(u64 insn) {
88 ISETP(*this, insn, GetReg20(insn));
89}
90
91void TranslatorVisitor::ISETP_cbuf(u64 insn) {
92 ISETP(*this, insn, GetCbuf(insn));
93}
94
95void TranslatorVisitor::ISETP_imm(u64) {
96 throw NotImplementedException("ISETP_imm");
97}
98
99} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..d4b417d14
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> w;
17 BitField<43, 1, u64> x;
18 BitField<47, 1, u64> cc;
19 } const shl{insn};
20
21 if (shl.x != 0) {
22 throw NotImplementedException("SHL.X");
23 }
24 if (shl.cc != 0) {
25 throw NotImplementedException("SHL.CC");
26 }
27 const IR::U32 base{v.X(shl.src_reg_a)};
28 IR::U32 result;
29 if (shl.w != 0) {
30 // When .W is set, the shift value is wrapped
31 // To emulate this we just have to clamp it ourselves.
32 const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
33 result = v.ir.ShiftLeftLogical(base, shift);
34 } else {
35 // When .W is not set, the shift value is clamped between 0 and 32.
36 // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
37 // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
38 //
39 // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
40 // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
41 // or equal to the bit width of the components of Base."
42 //
43 // And on the GLASM specification it is also safe to evaluate out of bounds:
44 //
45 // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
46 // "The results of a shift operation ("<<") are undefined if the value of the second operand
47 // is negative, or greater than or equal to the number of bits in the first operand."
48 //
49 // Emphasis on undefined results in contrast to undefined behavior.
50 //
51 const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
52 const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
53 result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0));
54 }
55 v.X(shl.dest_reg, result);
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::SHL_reg(u64) {
60 throw NotImplementedException("SHL_reg");
61}
62
63void TranslatorVisitor::SHL_cbuf(u64) {
64 throw NotImplementedException("SHL_cbuf");
65}
66
67void TranslatorVisitor::SHL_imm(u64 insn) {
68 SHL(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..70a7c76c5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SelectMode : u64 {
12 Default,
13 CLO,
14 CHI,
15 CSFU,
16 CBCC,
17};
18
19enum class Half : u64 {
20 H0, // Least-significant bits (15:0)
21 H1, // Most-significant bits (31:16)
22};
23
24IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
25 const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
26 return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
27}
28
29void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
30 SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
31 union {
32 u64 raw;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<8, 8, IR::Reg> src_reg_a;
35 BitField<47, 1, u64> cc;
36 BitField<48, 1, u64> is_a_signed;
37 BitField<49, 1, u64> is_b_signed;
38 BitField<53, 1, Half> half_a;
39 } const xmad{insn};
40
41 if (x) {
42 throw NotImplementedException("XMAD X");
43 }
44 const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
45 const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
46
47 IR::U32 product{v.ir.IMul(op_a, op_b)};
48 if (psl) {
49 // .PSL shifts the product 16 bits
50 product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
51 }
52 const IR::U32 op_c{[&]() -> IR::U32 {
53 switch (select_mode) {
54 case SelectMode::Default:
55 return src_c;
56 case SelectMode::CLO:
57 return ExtractHalf(v, src_c, Half::H0, false);
58 case SelectMode::CHI:
59 return ExtractHalf(v, src_c, Half::H1, false);
60 case SelectMode::CBCC:
61 return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b);
62 case SelectMode::CSFU:
63 throw NotImplementedException("XMAD CSFU");
64 }
65 throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
66 }()};
67 IR::U32 result{v.ir.IAdd(product, op_c)};
68 if (mrg) {
69 // .MRG inserts src_b [15:0] into result's [31:16].
70 const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
71 result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
72 }
73 if (xmad.cc) {
74 throw NotImplementedException("XMAD CC");
75 }
76 // Store result
77 v.X(xmad.dest_reg, result);
78}
79} // Anonymous namespace
80
81void TranslatorVisitor::XMAD_reg(u64) {
82 throw NotImplementedException("XMAD (reg)");
83}
84
85void TranslatorVisitor::XMAD_rc(u64) {
86 throw NotImplementedException("XMAD (rc)");
87}
88
89void TranslatorVisitor::XMAD_cr(u64) {
90 throw NotImplementedException("XMAD (cr)");
91}
92
93void TranslatorVisitor::XMAD_imm(u64 insn) {
94 union {
95 u64 raw;
96 BitField<20, 16, u64> src_b;
97 BitField<36, 1, u64> psl;
98 BitField<37, 1, u64> mrg;
99 BitField<38, 1, u64> x;
100 BitField<39, 8, IR::Reg> src_c;
101 BitField<50, 3, SelectMode> select_mode;
102 } const xmad{insn};
103
104 const IR::U32 src_b{ir.Imm32(static_cast<u32>(xmad.src_b))};
105 const IR::U32 src_c{X(xmad.src_c)};
106 XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0,
107 xmad.x != 0);
108}
109
110} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
index d8fd387cf..c9669c617 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -10,16 +10,35 @@
10 10
11namespace Shader::Maxwell { 11namespace Shader::Maxwell {
12namespace { 12namespace {
13enum class LoadSize : u64 {
14 U8, // Zero-extend
15 S8, // Sign-extend
16 U16, // Zero-extend
17 S16, // Sign-extend
18 B32,
19 B64,
20 B128,
21 U128, // ???
22};
23
13enum class StoreSize : u64 { 24enum class StoreSize : u64 {
14 U8, 25 U8, // Zero-extend
15 S8, 26 S8, // Sign-extend
16 U16, 27 U16, // Zero-extend
17 S16, 28 S16, // Sign-extend
18 B32, 29 B32,
19 B64, 30 B64,
20 B128, 31 B128,
21}; 32};
22 33
34// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
35enum class LoadCache : u64 {
36 CA, // Cache at all levels, likely to be accessed again
37 CG, // Cache at global level (cache in L2 and below, not L1)
38 CI, // ???
39 CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
40};
41
23// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html 42// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
24enum class StoreCache : u64 { 43enum class StoreCache : u64 {
25 WB, // Cache write-back all coherent levels 44 WB, // Cache write-back all coherent levels
@@ -27,61 +46,137 @@ enum class StoreCache : u64 {
27 CS, // Cache streaming, likely to be accessed once 46 CS, // Cache streaming, likely to be accessed once
28 WT, // Cache write-through (to system memory) 47 WT, // Cache write-through (to system memory)
29}; 48};
30} // Anonymous namespace
31 49
32void TranslatorVisitor::STG(u64 insn) { 50IR::U64 Address(TranslatorVisitor& v, u64 insn) {
33 // STG stores registers into global memory.
34 union { 51 union {
35 u64 raw; 52 u64 raw;
36 BitField<0, 8, IR::Reg> data_reg;
37 BitField<8, 8, IR::Reg> addr_reg; 53 BitField<8, 8, IR::Reg> addr_reg;
54 BitField<20, 24, s64> addr_offset;
55 BitField<20, 24, u64> rz_addr_offset;
38 BitField<45, 1, u64> e; 56 BitField<45, 1, u64> e;
39 BitField<46, 2, StoreCache> cache; 57 } const mem{insn};
40 BitField<48, 3, StoreSize> size;
41 } const stg{insn};
42 58
43 const IR::U64 address{[&]() -> IR::U64 { 59 const IR::U64 address{[&]() -> IR::U64 {
44 if (stg.e == 0) { 60 if (mem.e == 0) {
45 // STG without .E uses a 32-bit pointer, zero-extend it 61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it
46 return ir.ConvertU(64, X(stg.addr_reg)); 62 return v.ir.ConvertU(64, v.X(mem.addr_reg));
47 } 63 }
48 if (!IR::IsAligned(stg.addr_reg, 2)) { 64 if (!IR::IsAligned(mem.addr_reg, 2)) {
49 throw NotImplementedException("Unaligned address register"); 65 throw NotImplementedException("Unaligned address register");
50 } 66 }
51 // Pack two registers to build the 32-bit address 67 // Pack two registers to build the 64-bit address
52 return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); 68 return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
69 }()};
70 const u64 addr_offset{[&]() -> u64 {
71 if (mem.addr_reg == IR::Reg::RZ) {
72 // When RZ is used, the address is an absolute address
73 return static_cast<u64>(mem.rz_addr_offset.Value());
74 } else {
75 return static_cast<u64>(mem.addr_offset.Value());
76 }
53 }()}; 77 }()};
78 // Apply the offset
79 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::LDG(u64 insn) {
84 // LDG loads global memory into registers
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<46, 2, LoadCache> cache;
89 BitField<48, 3, LoadSize> size;
90 } const ldg{insn};
91
92 // Pointer to load data from
93 const IR::U64 address{Address(*this, insn)};
94 const IR::Reg dest_reg{ldg.dest_reg};
95 switch (ldg.size) {
96 case LoadSize::U8:
97 X(dest_reg, ir.LoadGlobalU8(address));
98 break;
99 case LoadSize::S8:
100 X(dest_reg, ir.LoadGlobalS8(address));
101 break;
102 case LoadSize::U16:
103 X(dest_reg, ir.LoadGlobalU16(address));
104 break;
105 case LoadSize::S16:
106 X(dest_reg, ir.LoadGlobalS16(address));
107 break;
108 case LoadSize::B32:
109 X(dest_reg, ir.LoadGlobal32(address));
110 break;
111 case LoadSize::B64: {
112 if (!IR::IsAligned(dest_reg, 2)) {
113 throw NotImplementedException("Unaligned data registers");
114 }
115 const IR::Value vector{ir.LoadGlobal64(address)};
116 for (int i = 0; i < 2; ++i) {
117 X(dest_reg + i, ir.CompositeExtract(vector, i));
118 }
119 break;
120 }
121 case LoadSize::B128: {
122 if (!IR::IsAligned(dest_reg, 4)) {
123 throw NotImplementedException("Unaligned data registers");
124 }
125 const IR::Value vector{ir.LoadGlobal128(address)};
126 for (int i = 0; i < 4; ++i) {
127 X(dest_reg + i, ir.CompositeExtract(vector, i));
128 }
129 break;
130 }
131 case LoadSize::U128:
132 throw NotImplementedException("LDG U.128");
133 default:
134 throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
135 }
136}
137
138void TranslatorVisitor::STG(u64 insn) {
139 // STG stores registers into global memory.
140 union {
141 u64 raw;
142 BitField<0, 8, IR::Reg> data_reg;
143 BitField<46, 2, StoreCache> cache;
144 BitField<48, 3, StoreSize> size;
145 } const stg{insn};
54 146
147 // Pointer to store data into
148 const IR::U64 address{Address(*this, insn)};
149 const IR::Reg data_reg{stg.data_reg};
55 switch (stg.size) { 150 switch (stg.size) {
56 case StoreSize::U8: 151 case StoreSize::U8:
57 ir.WriteGlobalU8(address, X(stg.data_reg)); 152 ir.WriteGlobalU8(address, X(data_reg));
58 break; 153 break;
59 case StoreSize::S8: 154 case StoreSize::S8:
60 ir.WriteGlobalS8(address, X(stg.data_reg)); 155 ir.WriteGlobalS8(address, X(data_reg));
61 break; 156 break;
62 case StoreSize::U16: 157 case StoreSize::U16:
63 ir.WriteGlobalU16(address, X(stg.data_reg)); 158 ir.WriteGlobalU16(address, X(data_reg));
64 break; 159 break;
65 case StoreSize::S16: 160 case StoreSize::S16:
66 ir.WriteGlobalS16(address, X(stg.data_reg)); 161 ir.WriteGlobalS16(address, X(data_reg));
67 break; 162 break;
68 case StoreSize::B32: 163 case StoreSize::B32:
69 ir.WriteGlobal32(address, X(stg.data_reg)); 164 ir.WriteGlobal32(address, X(data_reg));
70 break; 165 break;
71 case StoreSize::B64: { 166 case StoreSize::B64: {
72 if (!IR::IsAligned(stg.data_reg, 2)) { 167 if (!IR::IsAligned(data_reg, 2)) {
73 throw NotImplementedException("Unaligned data registers"); 168 throw NotImplementedException("Unaligned data registers");
74 } 169 }
75 const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; 170 const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
76 ir.WriteGlobal64(address, vector); 171 ir.WriteGlobal64(address, vector);
77 break; 172 break;
78 } 173 }
79 case StoreSize::B128: 174 case StoreSize::B128:
80 if (!IR::IsAligned(stg.data_reg, 4)) { 175 if (!IR::IsAligned(data_reg, 4)) {
81 throw NotImplementedException("Unaligned data registers"); 176 throw NotImplementedException("Unaligned data registers");
82 } 177 }
83 const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), 178 const IR::Value vector{
84 X(stg.data_reg + 2), X(stg.data_reg + 3))}; 179 ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
85 ir.WriteGlobal128(address, vector); 180 ir.WriteGlobal128(address, vector);
86 break; 181 break;
87 } 182 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
index 7fa35ba3a..1711d3f48 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -39,7 +39,7 @@ void TranslatorVisitor::MOV_cbuf(u64 insn) {
39void TranslatorVisitor::MOV_imm(u64 insn) { 39void TranslatorVisitor::MOV_imm(u64 insn) {
40 const MOV mov{insn}; 40 const MOV mov{insn};
41 CheckMask(mov); 41 CheckMask(mov);
42 X(mov.dest_reg, GetImm(insn)); 42 X(mov.dest_reg, GetImm20(insn));
43} 43}
44 44
45} // namespace Shader::Maxwell 45} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..93cea302a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,114 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SpecialRegister : u64 {
12 SR_LANEID = 0,
13 SR_VIRTCFG = 2,
14 SR_VIRTID = 3,
15 SR_PM0 = 4,
16 SR_PM1 = 5,
17 SR_PM2 = 6,
18 SR_PM3 = 7,
19 SR_PM4 = 8,
20 SR_PM5 = 9,
21 SR_PM6 = 10,
22 SR_PM7 = 11,
23 SR_ORDERING_TICKET = 15,
24 SR_PRIM_TYPE = 16,
25 SR_INVOCATION_ID = 17,
26 SR_Y_DIRECTION = 18,
27 SR_THREAD_KILL = 19,
28 SM_SHADER_TYPE = 20,
29 SR_DIRECTCBEWRITEADDRESSLOW = 21,
30 SR_DIRECTCBEWRITEADDRESSHIGH = 22,
31 SR_DIRECTCBEWRITEENABLE = 23,
32 SR_MACHINE_ID_0 = 24,
33 SR_MACHINE_ID_1 = 25,
34 SR_MACHINE_ID_2 = 26,
35 SR_MACHINE_ID_3 = 27,
36 SR_AFFINITY = 28,
37 SR_INVOCATION_INFO = 29,
38 SR_WSCALEFACTOR_XY = 30,
39 SR_WSCALEFACTOR_Z = 31,
40 SR_TID = 32,
41 SR_TID_X = 33,
42 SR_TID_Y = 34,
43 SR_TID_Z = 35,
44 SR_CTAID_X = 37,
45 SR_CTAID_Y = 38,
46 SR_CTAID_Z = 39,
47 SR_NTID = 49,
48 SR_CirQueueIncrMinusOne = 50,
49 SR_NLATC = 51,
50 SR_SWINLO = 57,
51 SR_SWINSZ = 58,
52 SR_SMEMSZ = 59,
53 SR_SMEMBANKS = 60,
54 SR_LWINLO = 61,
55 SR_LWINSZ = 62,
56 SR_LMEMLOSZ = 63,
57 SR_LMEMHIOFF = 64,
58 SR_EQMASK = 65,
59 SR_LTMASK = 66,
60 SR_LEMASK = 67,
61 SR_GTMASK = 68,
62 SR_GEMASK = 69,
63 SR_REGALLOC = 70,
64 SR_GLOBALERRORSTATUS = 73,
65 SR_WARPERRORSTATUS = 75,
66 SR_PM_HI0 = 81,
67 SR_PM_HI1 = 82,
68 SR_PM_HI2 = 83,
69 SR_PM_HI3 = 84,
70 SR_PM_HI4 = 85,
71 SR_PM_HI5 = 86,
72 SR_PM_HI6 = 87,
73 SR_PM_HI7 = 88,
74 SR_CLOCKLO = 89,
75 SR_CLOCKHI = 90,
76 SR_GLOBALTIMERLO = 91,
77 SR_GLOBALTIMERHI = 92,
78 SR_HWTASKID = 105,
79 SR_CIRCULARQUEUEENTRYINDEX = 106,
80 SR_CIRCULARQUEUEENTRYADDRESSLOW = 107,
81 SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108,
82};
83
84[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
85 switch (special_register) {
86 case SpecialRegister::SR_TID_X:
87 return ir.LocalInvocationIdX();
88 case SpecialRegister::SR_TID_Y:
89 return ir.LocalInvocationIdY();
90 case SpecialRegister::SR_TID_Z:
91 return ir.LocalInvocationIdZ();
92 case SpecialRegister::SR_CTAID_X:
93 return ir.WorkgroupIdX();
94 case SpecialRegister::SR_CTAID_Y:
95 return ir.WorkgroupIdY();
96 case SpecialRegister::SR_CTAID_Z:
97 return ir.WorkgroupIdZ();
98 default:
99 throw NotImplementedException("S2R special register {}", special_register);
100 }
101}
102} // Anonymous namespace
103
104void TranslatorVisitor::S2R(u64 insn) {
105 union {
106 u64 raw;
107 BitField<0, 8, IR::Reg> dest_reg;
108 BitField<20, 8, SpecialRegister> src_reg;
109 } const s2r{insn};
110
111 X(s2r.dest_reg, Read(ir, s2r.src_reg));
112}
113
114} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 0f52696d1..d70399f6b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -7,21 +7,8 @@
7#include "shader_recompiler/frontend/maxwell/opcode.h" 7#include "shader_recompiler/frontend/maxwell/opcode.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" 8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9 9
10#include "shader_recompiler/ir_opt/passes.h"
11
12namespace Shader::Maxwell { 10namespace Shader::Maxwell {
13 11
14[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) {
15 auto raw{IR::DumpBlock(block)};
16
17 Optimization::GetSetElimination(block);
18 Optimization::DeadCodeEliminationPass(block);
19 Optimization::IdentityRemovalPass(block);
20 auto dumped{IR::DumpBlock(block)};
21
22 fmt::print(stderr, "{}", dumped);
23}
24
25[[noreturn]] static void ThrowNotImplemented(Opcode opcode) { 12[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
26 throw NotImplementedException("Instruction {} is not implemented", opcode); 13 throw NotImplementedException("Instruction {} is not implemented", opcode);
27} 14}
@@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) {
146 ThrowNotImplemented(Opcode::DADD_imm); 133 ThrowNotImplemented(Opcode::DADD_imm);
147} 134}
148 135
149void TranslatorVisitor::DEPBAR(u64) { 136void TranslatorVisitor::DEPBAR() {
150 ThrowNotImplemented(Opcode::DEPBAR); 137 // DEPBAR is a no-op
151} 138}
152 139
153void TranslatorVisitor::DFMA_reg(u64) { 140void TranslatorVisitor::DFMA_reg(u64) {
@@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) {
230 ThrowNotImplemented(Opcode::F2F_imm); 217 ThrowNotImplemented(Opcode::F2F_imm);
231} 218}
232 219
233void TranslatorVisitor::FADD_reg(u64) {
234 ThrowNotImplemented(Opcode::FADD_reg);
235}
236
237void TranslatorVisitor::FADD_cbuf(u64) {
238 ThrowNotImplemented(Opcode::FADD_cbuf);
239}
240
241void TranslatorVisitor::FADD_imm(u64) {
242 ThrowNotImplemented(Opcode::FADD_imm);
243}
244
245void TranslatorVisitor::FADD32I(u64) {
246 ThrowNotImplemented(Opcode::FADD32I);
247}
248
249void TranslatorVisitor::FCHK_reg(u64) { 220void TranslatorVisitor::FCHK_reg(u64) {
250 ThrowNotImplemented(Opcode::FCHK_reg); 221 ThrowNotImplemented(Opcode::FCHK_reg);
251} 222}
@@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) {
274 ThrowNotImplemented(Opcode::FCMP_imm); 245 ThrowNotImplemented(Opcode::FCMP_imm);
275} 246}
276 247
277void TranslatorVisitor::FFMA_reg(u64) {
278 ThrowNotImplemented(Opcode::FFMA_reg);
279}
280
281void TranslatorVisitor::FFMA_rc(u64) {
282 ThrowNotImplemented(Opcode::FFMA_rc);
283}
284
285void TranslatorVisitor::FFMA_cr(u64) {
286 ThrowNotImplemented(Opcode::FFMA_cr);
287}
288
289void TranslatorVisitor::FFMA_imm(u64) {
290 ThrowNotImplemented(Opcode::FFMA_imm);
291}
292
293void TranslatorVisitor::FFMA32I(u64) {
294 ThrowNotImplemented(Opcode::FFMA32I);
295}
296
297void TranslatorVisitor::FLO_reg(u64) { 248void TranslatorVisitor::FLO_reg(u64) {
298 ThrowNotImplemented(Opcode::FLO_reg); 249 ThrowNotImplemented(Opcode::FLO_reg);
299} 250}
@@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) {
318 ThrowNotImplemented(Opcode::FMNMX_imm); 269 ThrowNotImplemented(Opcode::FMNMX_imm);
319} 270}
320 271
321void TranslatorVisitor::FMUL_reg(u64) {
322 ThrowNotImplemented(Opcode::FMUL_reg);
323}
324
325void TranslatorVisitor::FMUL_cbuf(u64) {
326 ThrowNotImplemented(Opcode::FMUL_cbuf);
327}
328
329void TranslatorVisitor::FMUL_imm(u64) {
330 ThrowNotImplemented(Opcode::FMUL_imm);
331}
332
333void TranslatorVisitor::FMUL32I(u64) {
334 ThrowNotImplemented(Opcode::FMUL32I);
335}
336
337void TranslatorVisitor::FSET_reg(u64) { 272void TranslatorVisitor::FSET_reg(u64) {
338 ThrowNotImplemented(Opcode::FSET_reg); 273 ThrowNotImplemented(Opcode::FSET_reg);
339} 274}
@@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) {
470 ThrowNotImplemented(Opcode::I2I_imm); 405 ThrowNotImplemented(Opcode::I2I_imm);
471} 406}
472 407
473void TranslatorVisitor::IADD_reg(u64) {
474 ThrowNotImplemented(Opcode::IADD_reg);
475}
476
477void TranslatorVisitor::IADD_cbuf(u64) {
478 ThrowNotImplemented(Opcode::IADD_cbuf);
479}
480
481void TranslatorVisitor::IADD_imm(u64) {
482 ThrowNotImplemented(Opcode::IADD_imm);
483}
484
485void TranslatorVisitor::IADD3_reg(u64) { 408void TranslatorVisitor::IADD3_reg(u64) {
486 ThrowNotImplemented(Opcode::IADD3_reg); 409 ThrowNotImplemented(Opcode::IADD3_reg);
487} 410}
@@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) {
494 ThrowNotImplemented(Opcode::IADD3_imm); 417 ThrowNotImplemented(Opcode::IADD3_imm);
495} 418}
496 419
497void TranslatorVisitor::IADD32I(u64) {
498 ThrowNotImplemented(Opcode::IADD32I);
499}
500
501void TranslatorVisitor::ICMP_reg(u64) { 420void TranslatorVisitor::ICMP_reg(u64) {
502 ThrowNotImplemented(Opcode::ICMP_reg); 421 ThrowNotImplemented(Opcode::ICMP_reg);
503} 422}
@@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) {
594 ThrowNotImplemented(Opcode::ISBERD); 513 ThrowNotImplemented(Opcode::ISBERD);
595} 514}
596 515
597void TranslatorVisitor::ISCADD_reg(u64) {
598 ThrowNotImplemented(Opcode::ISCADD_reg);
599}
600
601void TranslatorVisitor::ISCADD_cbuf(u64) {
602 ThrowNotImplemented(Opcode::ISCADD_cbuf);
603}
604
605void TranslatorVisitor::ISCADD_imm(u64) {
606 ThrowNotImplemented(Opcode::ISCADD_imm);
607}
608
609void TranslatorVisitor::ISCADD32I(u64) {
610 ThrowNotImplemented(Opcode::ISCADD32I);
611}
612
613void TranslatorVisitor::ISET_reg(u64) { 516void TranslatorVisitor::ISET_reg(u64) {
614 ThrowNotImplemented(Opcode::ISET_reg); 517 ThrowNotImplemented(Opcode::ISET_reg);
615} 518}
@@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) {
622 ThrowNotImplemented(Opcode::ISET_imm); 525 ThrowNotImplemented(Opcode::ISET_imm);
623} 526}
624 527
625void TranslatorVisitor::ISETP_reg(u64) {
626 ThrowNotImplemented(Opcode::ISETP_reg);
627}
628
629void TranslatorVisitor::ISETP_cbuf(u64) {
630 ThrowNotImplemented(Opcode::ISETP_cbuf);
631}
632
633void TranslatorVisitor::ISETP_imm(u64) {
634 ThrowNotImplemented(Opcode::ISETP_imm);
635}
636
637void TranslatorVisitor::JCAL(u64) { 528void TranslatorVisitor::JCAL(u64) {
638 ThrowNotImplemented(Opcode::JCAL); 529 ThrowNotImplemented(Opcode::JCAL);
639} 530}
@@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) {
658 ThrowNotImplemented(Opcode::LDC); 549 ThrowNotImplemented(Opcode::LDC);
659} 550}
660 551
661void TranslatorVisitor::LDG(u64) {
662 ThrowNotImplemented(Opcode::LDG);
663}
664
665void TranslatorVisitor::LDL(u64) { 552void TranslatorVisitor::LDL(u64) {
666 ThrowNotImplemented(Opcode::LDL); 553 ThrowNotImplemented(Opcode::LDL);
667} 554}
@@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) {
866 ThrowNotImplemented(Opcode::RTT); 753 ThrowNotImplemented(Opcode::RTT);
867} 754}
868 755
869void TranslatorVisitor::S2R(u64) {
870 ThrowNotImplemented(Opcode::S2R);
871}
872
873void TranslatorVisitor::SAM(u64) { 756void TranslatorVisitor::SAM(u64) {
874 ThrowNotImplemented(Opcode::SAM); 757 ThrowNotImplemented(Opcode::SAM);
875} 758}
@@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) {
914 ThrowNotImplemented(Opcode::SHFL); 797 ThrowNotImplemented(Opcode::SHFL);
915} 798}
916 799
917void TranslatorVisitor::SHL_reg(u64) {
918 ThrowNotImplemented(Opcode::SHL_reg);
919}
920
921void TranslatorVisitor::SHL_cbuf(u64) {
922 ThrowNotImplemented(Opcode::SHL_cbuf);
923}
924
925void TranslatorVisitor::SHL_imm(u64) {
926 ThrowNotImplemented(Opcode::SHL_imm);
927}
928
929void TranslatorVisitor::SHR_reg(u64) { 800void TranslatorVisitor::SHR_reg(u64) {
930 ThrowNotImplemented(Opcode::SHR_reg); 801 ThrowNotImplemented(Opcode::SHR_reg);
931} 802}
@@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) {
1086 ThrowNotImplemented(Opcode::VSHR); 957 ThrowNotImplemented(Opcode::VSHR);
1087} 958}
1088 959
1089void TranslatorVisitor::XMAD_reg(u64) {
1090 ThrowNotImplemented(Opcode::XMAD_reg);
1091}
1092
1093void TranslatorVisitor::XMAD_rc(u64) {
1094 ThrowNotImplemented(Opcode::XMAD_rc);
1095}
1096
1097void TranslatorVisitor::XMAD_cr(u64) {
1098 ThrowNotImplemented(Opcode::XMAD_cr);
1099}
1100
1101void TranslatorVisitor::XMAD_imm(u64) {
1102 ThrowNotImplemented(Opcode::XMAD_imm);
1103}
1104
1105} // namespace Shader::Maxwell 960} // namespace Shader::Maxwell