diff options
Diffstat (limited to 'src/shader_recompiler/frontend')
24 files changed, 1437 insertions, 243 deletions
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index e795618fc..249251dd0 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp | |||
| @@ -23,8 +23,8 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { | |||
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, | 25 | Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, |
| 26 | std::initializer_list<Value> args) { | 26 | std::initializer_list<Value> args, u64 flags) { |
| 27 | Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)}; | 27 | Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)}; |
| 28 | const auto result_it{instructions.insert(insertion_point, *inst)}; | 28 | const auto result_it{instructions.insert(insertion_point, *inst)}; |
| 29 | 29 | ||
| 30 | if (inst->NumArgs() != args.size()) { | 30 | if (inst->NumArgs() != args.size()) { |
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 4b6b80c4b..ec4a41cb1 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h | |||
| @@ -39,7 +39,7 @@ public: | |||
| 39 | 39 | ||
| 40 | /// Prepends a new instruction to this basic block before the insertion point. | 40 | /// Prepends a new instruction to this basic block before the insertion point. |
| 41 | iterator PrependNewInst(iterator insertion_point, Opcode op, | 41 | iterator PrependNewInst(iterator insertion_point, Opcode op, |
| 42 | std::initializer_list<Value> args = {}); | 42 | std::initializer_list<Value> args = {}, u64 flags = 0); |
| 43 | 43 | ||
| 44 | /// Adds a new immediate predecessor to the basic block. | 44 | /// Adds a new immediate predecessor to the basic block. |
| 45 | void AddImmediatePredecessor(IR::Block* immediate_predecessor); | 45 | void AddImmediatePredecessor(IR::Block* immediate_predecessor); |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6450e4b2c..87b253c9a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -129,6 +129,58 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) { | |||
| 129 | Inst(Opcode::SetAttribute, attribute, value); | 129 | Inst(Opcode::SetAttribute, attribute, value); |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | U32 IREmitter::WorkgroupIdX() { | ||
| 133 | return Inst<U32>(Opcode::WorkgroupIdX); | ||
| 134 | } | ||
| 135 | |||
| 136 | U32 IREmitter::WorkgroupIdY() { | ||
| 137 | return Inst<U32>(Opcode::WorkgroupIdY); | ||
| 138 | } | ||
| 139 | |||
| 140 | U32 IREmitter::WorkgroupIdZ() { | ||
| 141 | return Inst<U32>(Opcode::WorkgroupIdZ); | ||
| 142 | } | ||
| 143 | |||
| 144 | U32 IREmitter::LocalInvocationIdX() { | ||
| 145 | return Inst<U32>(Opcode::LocalInvocationIdX); | ||
| 146 | } | ||
| 147 | |||
| 148 | U32 IREmitter::LocalInvocationIdY() { | ||
| 149 | return Inst<U32>(Opcode::LocalInvocationIdY); | ||
| 150 | } | ||
| 151 | |||
| 152 | U32 IREmitter::LocalInvocationIdZ() { | ||
| 153 | return Inst<U32>(Opcode::LocalInvocationIdZ); | ||
| 154 | } | ||
| 155 | |||
| 156 | U32 IREmitter::LoadGlobalU8(const U64& address) { | ||
| 157 | return Inst<U32>(Opcode::LoadGlobalU8, address); | ||
| 158 | } | ||
| 159 | |||
| 160 | U32 IREmitter::LoadGlobalS8(const U64& address) { | ||
| 161 | return Inst<U32>(Opcode::LoadGlobalS8, address); | ||
| 162 | } | ||
| 163 | |||
| 164 | U32 IREmitter::LoadGlobalU16(const U64& address) { | ||
| 165 | return Inst<U32>(Opcode::LoadGlobalU16, address); | ||
| 166 | } | ||
| 167 | |||
| 168 | U32 IREmitter::LoadGlobalS16(const U64& address) { | ||
| 169 | return Inst<U32>(Opcode::LoadGlobalS16, address); | ||
| 170 | } | ||
| 171 | |||
| 172 | U32 IREmitter::LoadGlobal32(const U64& address) { | ||
| 173 | return Inst<U32>(Opcode::LoadGlobal32, address); | ||
| 174 | } | ||
| 175 | |||
| 176 | Value IREmitter::LoadGlobal64(const U64& address) { | ||
| 177 | return Inst<Value>(Opcode::LoadGlobal64, address); | ||
| 178 | } | ||
| 179 | |||
| 180 | Value IREmitter::LoadGlobal128(const U64& address) { | ||
| 181 | return Inst<Value>(Opcode::LoadGlobal128, address); | ||
| 182 | } | ||
| 183 | |||
| 132 | void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { | 184 | void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { |
| 133 | Inst(Opcode::WriteGlobalU8, address, value); | 185 | Inst(Opcode::WriteGlobalU8, address, value); |
| 134 | } | 186 | } |
| @@ -173,17 +225,17 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { | |||
| 173 | return Inst<U1>(Opcode::GetOverflowFromOp, op); | 225 | return Inst<U1>(Opcode::GetOverflowFromOp, op); |
| 174 | } | 226 | } |
| 175 | 227 | ||
| 176 | U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { | 228 | U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) { |
| 177 | if (a.Type() != a.Type()) { | 229 | if (a.Type() != a.Type()) { |
| 178 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | 230 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); |
| 179 | } | 231 | } |
| 180 | switch (a.Type()) { | 232 | switch (a.Type()) { |
| 181 | case Type::U16: | 233 | case Type::U16: |
| 182 | return Inst<U16>(Opcode::FPAdd16, a, b); | 234 | return Inst<U16>(Opcode::FPAdd16, Flags{control}, a, b); |
| 183 | case Type::U32: | 235 | case Type::U32: |
| 184 | return Inst<U32>(Opcode::FPAdd32, a, b); | 236 | return Inst<U32>(Opcode::FPAdd32, Flags{control}, a, b); |
| 185 | case Type::U64: | 237 | case Type::U64: |
| 186 | return Inst<U64>(Opcode::FPAdd64, a, b); | 238 | return Inst<U64>(Opcode::FPAdd64, Flags{control}, a, b); |
| 187 | default: | 239 | default: |
| 188 | ThrowInvalidType(a.Type()); | 240 | ThrowInvalidType(a.Type()); |
| 189 | } | 241 | } |
| @@ -191,14 +243,14 @@ U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { | |||
| 191 | 243 | ||
| 192 | Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { | 244 | Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { |
| 193 | if (e1.Type() != e2.Type()) { | 245 | if (e1.Type() != e2.Type()) { |
| 194 | throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type()); | 246 | throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); |
| 195 | } | 247 | } |
| 196 | return Inst(Opcode::CompositeConstruct2, e1, e2); | 248 | return Inst(Opcode::CompositeConstruct2, e1, e2); |
| 197 | } | 249 | } |
| 198 | 250 | ||
| 199 | Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { | 251 | Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { |
| 200 | if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { | 252 | if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { |
| 201 | throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type()); | 253 | throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); |
| 202 | } | 254 | } |
| 203 | return Inst(Opcode::CompositeConstruct3, e1, e2, e3); | 255 | return Inst(Opcode::CompositeConstruct3, e1, e2, e3); |
| 204 | } | 256 | } |
| @@ -206,8 +258,8 @@ Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& | |||
| 206 | Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, | 258 | Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, |
| 207 | const UAny& e4) { | 259 | const UAny& e4) { |
| 208 | if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { | 260 | if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { |
| 209 | throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(), | 261 | throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), |
| 210 | e4.Type()); | 262 | e3.Type(), e4.Type()); |
| 211 | } | 263 | } |
| 212 | return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); | 264 | return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); |
| 213 | } | 265 | } |
| @@ -219,6 +271,24 @@ UAny IREmitter::CompositeExtract(const Value& vector, size_t element) { | |||
| 219 | return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element))); | 271 | return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element))); |
| 220 | } | 272 | } |
| 221 | 273 | ||
| 274 | UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { | ||
| 275 | if (true_value.Type() != false_value.Type()) { | ||
| 276 | throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); | ||
| 277 | } | ||
| 278 | switch (true_value.Type()) { | ||
| 279 | case Type::U8: | ||
| 280 | return Inst<UAny>(Opcode::Select8, condition, true_value, false_value); | ||
| 281 | case Type::U16: | ||
| 282 | return Inst<UAny>(Opcode::Select16, condition, true_value, false_value); | ||
| 283 | case Type::U32: | ||
| 284 | return Inst<UAny>(Opcode::Select32, condition, true_value, false_value); | ||
| 285 | case Type::U64: | ||
| 286 | return Inst<UAny>(Opcode::Select64, condition, true_value, false_value); | ||
| 287 | default: | ||
| 288 | throw InvalidArgument("Invalid type {}", true_value.Type()); | ||
| 289 | } | ||
| 290 | } | ||
| 291 | |||
| 222 | U64 IREmitter::PackUint2x32(const Value& vector) { | 292 | U64 IREmitter::PackUint2x32(const Value& vector) { |
| 223 | return Inst<U64>(Opcode::PackUint2x32, vector); | 293 | return Inst<U64>(Opcode::PackUint2x32, vector); |
| 224 | } | 294 | } |
| @@ -243,17 +313,34 @@ Value IREmitter::UnpackDouble2x32(const U64& value) { | |||
| 243 | return Inst<Value>(Opcode::UnpackDouble2x32, value); | 313 | return Inst<Value>(Opcode::UnpackDouble2x32, value); |
| 244 | } | 314 | } |
| 245 | 315 | ||
| 246 | U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) { | 316 | U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) { |
| 247 | if (a.Type() != b.Type()) { | 317 | if (a.Type() != b.Type()) { |
| 248 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | 318 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); |
| 249 | } | 319 | } |
| 250 | switch (a.Type()) { | 320 | switch (a.Type()) { |
| 251 | case Type::U16: | 321 | case Type::U16: |
| 252 | return Inst<U16>(Opcode::FPMul16, a, b); | 322 | return Inst<U16>(Opcode::FPMul16, Flags{control}, a, b); |
| 253 | case Type::U32: | 323 | case Type::U32: |
| 254 | return Inst<U32>(Opcode::FPMul32, a, b); | 324 | return Inst<U32>(Opcode::FPMul32, Flags{control}, a, b); |
| 255 | case Type::U64: | 325 | case Type::U64: |
| 256 | return Inst<U64>(Opcode::FPMul64, a, b); | 326 | return Inst<U64>(Opcode::FPMul64, Flags{control}, a, b); |
| 327 | default: | ||
| 328 | ThrowInvalidType(a.Type()); | ||
| 329 | } | ||
| 330 | } | ||
| 331 | |||
| 332 | U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, | ||
| 333 | FpControl control) { | ||
| 334 | if (a.Type() != b.Type() || a.Type() != c.Type()) { | ||
| 335 | throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); | ||
| 336 | } | ||
| 337 | switch (a.Type()) { | ||
| 338 | case Type::U16: | ||
| 339 | return Inst<U16>(Opcode::FPFma16, Flags{control}, a, b, c); | ||
| 340 | case Type::U32: | ||
| 341 | return Inst<U32>(Opcode::FPFma32, Flags{control}, a, b, c); | ||
| 342 | case Type::U64: | ||
| 343 | return Inst<U64>(Opcode::FPFma64, Flags{control}, a, b, c); | ||
| 257 | default: | 344 | default: |
| 258 | ThrowInvalidType(a.Type()); | 345 | ThrowInvalidType(a.Type()); |
| 259 | } | 346 | } |
| @@ -403,6 +490,91 @@ U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) { | |||
| 403 | } | 490 | } |
| 404 | } | 491 | } |
| 405 | 492 | ||
| 493 | U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { | ||
| 494 | if (a.Type() != b.Type()) { | ||
| 495 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||
| 496 | } | ||
| 497 | switch (a.Type()) { | ||
| 498 | case Type::U32: | ||
| 499 | return Inst<U32>(Opcode::IAdd32, a, b); | ||
| 500 | case Type::U64: | ||
| 501 | return Inst<U64>(Opcode::IAdd64, a, b); | ||
| 502 | default: | ||
| 503 | ThrowInvalidType(a.Type()); | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 507 | U32 IREmitter::IMul(const U32& a, const U32& b) { | ||
| 508 | return Inst<U32>(Opcode::IMul32, a, b); | ||
| 509 | } | ||
| 510 | |||
| 511 | U32 IREmitter::INeg(const U32& value) { | ||
| 512 | return Inst<U32>(Opcode::INeg32, value); | ||
| 513 | } | ||
| 514 | |||
| 515 | U32 IREmitter::IAbs(const U32& value) { | ||
| 516 | return Inst<U32>(Opcode::IAbs32, value); | ||
| 517 | } | ||
| 518 | |||
| 519 | U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) { | ||
| 520 | return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift); | ||
| 521 | } | ||
| 522 | |||
| 523 | U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) { | ||
| 524 | return Inst<U32>(Opcode::ShiftRightLogical32, base, shift); | ||
| 525 | } | ||
| 526 | |||
| 527 | U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) { | ||
| 528 | return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift); | ||
| 529 | } | ||
| 530 | |||
| 531 | U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { | ||
| 532 | return Inst<U32>(Opcode::BitwiseAnd32, a, b); | ||
| 533 | } | ||
| 534 | |||
| 535 | U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { | ||
| 536 | return Inst<U32>(Opcode::BitwiseOr32, a, b); | ||
| 537 | } | ||
| 538 | |||
| 539 | U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { | ||
| 540 | return Inst<U32>(Opcode::BitwiseXor32, a, b); | ||
| 541 | } | ||
| 542 | |||
| 543 | U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | ||
| 544 | const U32& count) { | ||
| 545 | return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count); | ||
| 546 | } | ||
| 547 | |||
| 548 | U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count, | ||
| 549 | bool is_signed) { | ||
| 550 | return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset, | ||
| 551 | count); | ||
| 552 | } | ||
| 553 | |||
| 554 | U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 555 | return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); | ||
| 556 | } | ||
| 557 | |||
| 558 | U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) { | ||
| 559 | return Inst<U1>(Opcode::IEqual, lhs, rhs); | ||
| 560 | } | ||
| 561 | |||
| 562 | U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 563 | return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); | ||
| 564 | } | ||
| 565 | |||
| 566 | U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 567 | return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); | ||
| 568 | } | ||
| 569 | |||
| 570 | U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) { | ||
| 571 | return Inst<U1>(Opcode::INotEqual, lhs, rhs); | ||
| 572 | } | ||
| 573 | |||
| 574 | U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 575 | return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); | ||
| 576 | } | ||
| 577 | |||
| 406 | U1 IREmitter::LogicalOr(const U1& a, const U1& b) { | 578 | U1 IREmitter::LogicalOr(const U1& a, const U1& b) { |
| 407 | return Inst<U1>(Opcode::LogicalOr, a, b); | 579 | return Inst<U1>(Opcode::LogicalOr, a, b); |
| 408 | } | 580 | } |
| @@ -411,6 +583,10 @@ U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { | |||
| 411 | return Inst<U1>(Opcode::LogicalAnd, a, b); | 583 | return Inst<U1>(Opcode::LogicalAnd, a, b); |
| 412 | } | 584 | } |
| 413 | 585 | ||
| 586 | U1 IREmitter::LogicalXor(const U1& a, const U1& b) { | ||
| 587 | return Inst<U1>(Opcode::LogicalXor, a, b); | ||
| 588 | } | ||
| 589 | |||
| 414 | U1 IREmitter::LogicalNot(const U1& value) { | 590 | U1 IREmitter::LogicalNot(const U1& value) { |
| 415 | return Inst<U1>(Opcode::LogicalNot, value); | 591 | return Inst<U1>(Opcode::LogicalNot, value); |
| 416 | } | 592 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1af79f41c..7ff763ecf 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -4,8 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstring> | ||
| 8 | #include <type_traits> | ||
| 9 | |||
| 7 | #include "shader_recompiler/frontend/ir/attribute.h" | 10 | #include "shader_recompiler/frontend/ir/attribute.h" |
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | 11 | #include "shader_recompiler/frontend/ir/basic_block.h" |
| 12 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | 13 | #include "shader_recompiler/frontend/ir/value.h" |
| 10 | 14 | ||
| 11 | namespace Shader::IR { | 15 | namespace Shader::IR { |
| @@ -52,6 +56,22 @@ public: | |||
| 52 | [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); | 56 | [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); |
| 53 | void SetAttribute(IR::Attribute attribute, const U32& value); | 57 | void SetAttribute(IR::Attribute attribute, const U32& value); |
| 54 | 58 | ||
| 59 | [[nodiscard]] U32 WorkgroupIdX(); | ||
| 60 | [[nodiscard]] U32 WorkgroupIdY(); | ||
| 61 | [[nodiscard]] U32 WorkgroupIdZ(); | ||
| 62 | |||
| 63 | [[nodiscard]] U32 LocalInvocationIdX(); | ||
| 64 | [[nodiscard]] U32 LocalInvocationIdY(); | ||
| 65 | [[nodiscard]] U32 LocalInvocationIdZ(); | ||
| 66 | |||
| 67 | [[nodiscard]] U32 LoadGlobalU8(const U64& address); | ||
| 68 | [[nodiscard]] U32 LoadGlobalS8(const U64& address); | ||
| 69 | [[nodiscard]] U32 LoadGlobalU16(const U64& address); | ||
| 70 | [[nodiscard]] U32 LoadGlobalS16(const U64& address); | ||
| 71 | [[nodiscard]] U32 LoadGlobal32(const U64& address); | ||
| 72 | [[nodiscard]] Value LoadGlobal64(const U64& address); | ||
| 73 | [[nodiscard]] Value LoadGlobal128(const U64& address); | ||
| 74 | |||
| 55 | void WriteGlobalU8(const U64& address, const U32& value); | 75 | void WriteGlobalU8(const U64& address, const U32& value); |
| 56 | void WriteGlobalS8(const U64& address, const U32& value); | 76 | void WriteGlobalS8(const U64& address, const U32& value); |
| 57 | void WriteGlobalU16(const U64& address, const U32& value); | 77 | void WriteGlobalU16(const U64& address, const U32& value); |
| @@ -71,6 +91,8 @@ public: | |||
| 71 | const UAny& e4); | 91 | const UAny& e4); |
| 72 | [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); | 92 | [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); |
| 73 | 93 | ||
| 94 | [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); | ||
| 95 | |||
| 74 | [[nodiscard]] U64 PackUint2x32(const Value& vector); | 96 | [[nodiscard]] U64 PackUint2x32(const Value& vector); |
| 75 | [[nodiscard]] Value UnpackUint2x32(const U64& value); | 97 | [[nodiscard]] Value UnpackUint2x32(const U64& value); |
| 76 | 98 | ||
| @@ -80,8 +102,10 @@ public: | |||
| 80 | [[nodiscard]] U64 PackDouble2x32(const Value& vector); | 102 | [[nodiscard]] U64 PackDouble2x32(const Value& vector); |
| 81 | [[nodiscard]] Value UnpackDouble2x32(const U64& value); | 103 | [[nodiscard]] Value UnpackDouble2x32(const U64& value); |
| 82 | 104 | ||
| 83 | [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b); | 105 | [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); |
| 84 | [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b); | 106 | [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); |
| 107 | [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, | ||
| 108 | FpControl control = {}); | ||
| 85 | 109 | ||
| 86 | [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); | 110 | [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); |
| 87 | [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); | 111 | [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); |
| @@ -100,8 +124,31 @@ public: | |||
| 100 | [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); | 124 | [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); |
| 101 | [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); | 125 | [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); |
| 102 | 126 | ||
| 127 | [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); | ||
| 128 | [[nodiscard]] U32 IMul(const U32& a, const U32& b); | ||
| 129 | [[nodiscard]] U32 INeg(const U32& value); | ||
| 130 | [[nodiscard]] U32 IAbs(const U32& value); | ||
| 131 | [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift); | ||
| 132 | [[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift); | ||
| 133 | [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift); | ||
| 134 | [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); | ||
| 135 | [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); | ||
| 136 | [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); | ||
| 137 | [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | ||
| 138 | const U32& count); | ||
| 139 | [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, | ||
| 140 | bool is_signed); | ||
| 141 | |||
| 142 | [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 143 | [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); | ||
| 144 | [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 145 | [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 146 | [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); | ||
| 147 | [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 148 | |||
| 103 | [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); | 149 | [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); |
| 104 | [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); | 150 | [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); |
| 151 | [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); | ||
| 105 | [[nodiscard]] U1 LogicalNot(const U1& value); | 152 | [[nodiscard]] U1 LogicalNot(const U1& value); |
| 106 | 153 | ||
| 107 | [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); | 154 | [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); |
| @@ -118,6 +165,22 @@ private: | |||
| 118 | auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; | 165 | auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; |
| 119 | return T{Value{&*it}}; | 166 | return T{Value{&*it}}; |
| 120 | } | 167 | } |
| 168 | |||
| 169 | template <typename T> | ||
| 170 | requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v<T>) struct Flags { | ||
| 171 | Flags() = default; | ||
| 172 | Flags(T proxy_) : proxy{proxy_} {} | ||
| 173 | |||
| 174 | T proxy; | ||
| 175 | }; | ||
| 176 | |||
| 177 | template <typename T = Value, typename FlagType, typename... Args> | ||
| 178 | T Inst(Opcode op, Flags<FlagType> flags, Args... args) { | ||
| 179 | u64 raw_flags{}; | ||
| 180 | std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); | ||
| 181 | auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; | ||
| 182 | return T{Value{&*it}}; | ||
| 183 | } | ||
| 121 | }; | 184 | }; |
| 122 | 185 | ||
| 123 | } // namespace Shader::IR | 186 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 7f1ed6710..61849695a 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h | |||
| @@ -5,7 +5,9 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | ||
| 8 | #include <span> | 9 | #include <span> |
| 10 | #include <type_traits> | ||
| 9 | #include <vector> | 11 | #include <vector> |
| 10 | 12 | ||
| 11 | #include <boost/intrusive/list.hpp> | 13 | #include <boost/intrusive/list.hpp> |
| @@ -23,7 +25,7 @@ constexpr size_t MAX_ARG_COUNT = 4; | |||
| 23 | 25 | ||
| 24 | class Inst : public boost::intrusive::list_base_hook<> { | 26 | class Inst : public boost::intrusive::list_base_hook<> { |
| 25 | public: | 27 | public: |
| 26 | explicit Inst(Opcode op_) noexcept : op(op_) {} | 28 | explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {} |
| 27 | 29 | ||
| 28 | /// Get the number of uses this instruction has. | 30 | /// Get the number of uses this instruction has. |
| 29 | [[nodiscard]] int UseCount() const noexcept { | 31 | [[nodiscard]] int UseCount() const noexcept { |
| @@ -73,6 +75,14 @@ public: | |||
| 73 | 75 | ||
| 74 | void ReplaceUsesWith(Value replacement); | 76 | void ReplaceUsesWith(Value replacement); |
| 75 | 77 | ||
| 78 | template <typename FlagsType> | ||
| 79 | requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v<FlagsType>) | ||
| 80 | [[nodiscard]] FlagsType Flags() const noexcept { | ||
| 81 | FlagsType ret; | ||
| 82 | std::memcpy(&ret, &flags, sizeof(ret)); | ||
| 83 | return ret; | ||
| 84 | } | ||
| 85 | |||
| 76 | private: | 86 | private: |
| 77 | void Use(const Value& value); | 87 | void Use(const Value& value); |
| 78 | void UndoUse(const Value& value); | 88 | void UndoUse(const Value& value); |
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h new file mode 100644 index 000000000..28bb9e798 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace Shader::IR { | ||
| 8 | |||
| 9 | enum class FmzMode { | ||
| 10 | None, // Denorms are not flushed, NAN is propagated (nouveau) | ||
| 11 | FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) | ||
| 12 | FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) | ||
| 13 | }; | ||
| 14 | |||
| 15 | enum class FpRounding { | ||
| 16 | RN, // Round to nearest even, | ||
| 17 | RM, // Round towards negative infinity | ||
| 18 | RP, // Round towards positive infinity | ||
| 19 | RZ, // Round towards zero | ||
| 20 | }; | ||
| 21 | |||
| 22 | struct FpControl { | ||
| 23 | bool no_contraction{false}; | ||
| 24 | FpRounding rounding : 8 = FpRounding::RN; | ||
| 25 | FmzMode fmz_mode : 8 = FmzMode::FTZ; | ||
| 26 | }; | ||
| 27 | static_assert(sizeof(FpControl) <= sizeof(u64)); | ||
| 28 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 40759e96a..4ecb5e936 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc | |||
| @@ -35,6 +35,12 @@ OPCODE(SetZFlag, Void, U1, | |||
| 35 | OPCODE(SetSFlag, Void, U1, ) | 35 | OPCODE(SetSFlag, Void, U1, ) |
| 36 | OPCODE(SetCFlag, Void, U1, ) | 36 | OPCODE(SetCFlag, Void, U1, ) |
| 37 | OPCODE(SetOFlag, Void, U1, ) | 37 | OPCODE(SetOFlag, Void, U1, ) |
| 38 | OPCODE(WorkgroupIdX, U32, ) | ||
| 39 | OPCODE(WorkgroupIdY, U32, ) | ||
| 40 | OPCODE(WorkgroupIdZ, U32, ) | ||
| 41 | OPCODE(LocalInvocationIdX, U32, ) | ||
| 42 | OPCODE(LocalInvocationIdY, U32, ) | ||
| 43 | OPCODE(LocalInvocationIdZ, U32, ) | ||
| 38 | 44 | ||
| 39 | // Undefined | 45 | // Undefined |
| 40 | OPCODE(Undef1, U1, ) | 46 | OPCODE(Undef1, U1, ) |
| @@ -44,6 +50,13 @@ OPCODE(Undef32, U32, | |||
| 44 | OPCODE(Undef64, U64, ) | 50 | OPCODE(Undef64, U64, ) |
| 45 | 51 | ||
| 46 | // Memory operations | 52 | // Memory operations |
| 53 | OPCODE(LoadGlobalU8, U32, U64, ) | ||
| 54 | OPCODE(LoadGlobalS8, U32, U64, ) | ||
| 55 | OPCODE(LoadGlobalU16, U32, U64, ) | ||
| 56 | OPCODE(LoadGlobalS16, U32, U64, ) | ||
| 57 | OPCODE(LoadGlobal32, U32, U64, ) | ||
| 58 | OPCODE(LoadGlobal64, Opaque, U64, ) | ||
| 59 | OPCODE(LoadGlobal128, Opaque, U64, ) | ||
| 47 | OPCODE(WriteGlobalU8, Void, U64, U32, ) | 60 | OPCODE(WriteGlobalU8, Void, U64, U32, ) |
| 48 | OPCODE(WriteGlobalS8, Void, U64, U32, ) | 61 | OPCODE(WriteGlobalS8, Void, U64, U32, ) |
| 49 | OPCODE(WriteGlobalU16, Void, U64, U32, ) | 62 | OPCODE(WriteGlobalU16, Void, U64, U32, ) |
| @@ -58,6 +71,12 @@ OPCODE(CompositeConstruct3, Opaque, Opaq | |||
| 58 | OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) | 71 | OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) |
| 59 | OPCODE(CompositeExtract, Opaque, Opaque, U32, ) | 72 | OPCODE(CompositeExtract, Opaque, Opaque, U32, ) |
| 60 | 73 | ||
| 74 | // Select operations | ||
| 75 | OPCODE(Select8, U8, U1, U8, U8, ) | ||
| 76 | OPCODE(Select16, U16, U1, U16, U16, ) | ||
| 77 | OPCODE(Select32, U32, U1, U32, U32, ) | ||
| 78 | OPCODE(Select64, U64, U1, U64, U64, ) | ||
| 79 | |||
| 61 | // Bitwise conversions | 80 | // Bitwise conversions |
| 62 | OPCODE(PackUint2x32, U64, Opaque, ) | 81 | OPCODE(PackUint2x32, U64, Opaque, ) |
| 63 | OPCODE(UnpackUint2x32, Opaque, U64, ) | 82 | OPCODE(UnpackUint2x32, Opaque, U64, ) |
| @@ -74,56 +93,84 @@ OPCODE(GetOverflowFromOp, U1, Opaq | |||
| 74 | OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) | 93 | OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) |
| 75 | 94 | ||
| 76 | // Floating-point operations | 95 | // Floating-point operations |
| 77 | OPCODE(FPAbs16, U16, U16 ) | 96 | OPCODE(FPAbs16, U16, U16, ) |
| 78 | OPCODE(FPAbs32, U32, U32 ) | 97 | OPCODE(FPAbs32, U32, U32, ) |
| 79 | OPCODE(FPAbs64, U64, U64 ) | 98 | OPCODE(FPAbs64, U64, U64, ) |
| 80 | OPCODE(FPAdd16, U16, U16, U16 ) | 99 | OPCODE(FPAdd16, U16, U16, U16, ) |
| 81 | OPCODE(FPAdd32, U32, U32, U32 ) | 100 | OPCODE(FPAdd32, U32, U32, U32, ) |
| 82 | OPCODE(FPAdd64, U64, U64, U64 ) | 101 | OPCODE(FPAdd64, U64, U64, U64, ) |
| 83 | OPCODE(FPFma16, U16, U16, U16 ) | 102 | OPCODE(FPFma16, U16, U16, U16, U16, ) |
| 84 | OPCODE(FPFma32, U32, U32, U32 ) | 103 | OPCODE(FPFma32, U32, U32, U32, U32, ) |
| 85 | OPCODE(FPFma64, U64, U64, U64 ) | 104 | OPCODE(FPFma64, U64, U64, U64, U64, ) |
| 86 | OPCODE(FPMax32, U32, U32, U32 ) | 105 | OPCODE(FPMax32, U32, U32, U32, ) |
| 87 | OPCODE(FPMax64, U64, U64, U64 ) | 106 | OPCODE(FPMax64, U64, U64, U64, ) |
| 88 | OPCODE(FPMin32, U32, U32, U32 ) | 107 | OPCODE(FPMin32, U32, U32, U32, ) |
| 89 | OPCODE(FPMin64, U64, U64, U64 ) | 108 | OPCODE(FPMin64, U64, U64, U64, ) |
| 90 | OPCODE(FPMul16, U16, U16, U16 ) | 109 | OPCODE(FPMul16, U16, U16, U16, ) |
| 91 | OPCODE(FPMul32, U32, U32, U32 ) | 110 | OPCODE(FPMul32, U32, U32, U32, ) |
| 92 | OPCODE(FPMul64, U64, U64, U64 ) | 111 | OPCODE(FPMul64, U64, U64, U64, ) |
| 93 | OPCODE(FPNeg16, U16, U16 ) | 112 | OPCODE(FPNeg16, U16, U16, ) |
| 94 | OPCODE(FPNeg32, U32, U32 ) | 113 | OPCODE(FPNeg32, U32, U32, ) |
| 95 | OPCODE(FPNeg64, U64, U64 ) | 114 | OPCODE(FPNeg64, U64, U64, ) |
| 96 | OPCODE(FPRecip32, U32, U32 ) | 115 | OPCODE(FPRecip32, U32, U32, ) |
| 97 | OPCODE(FPRecip64, U64, U64 ) | 116 | OPCODE(FPRecip64, U64, U64, ) |
| 98 | OPCODE(FPRecipSqrt32, U32, U32 ) | 117 | OPCODE(FPRecipSqrt32, U32, U32, ) |
| 99 | OPCODE(FPRecipSqrt64, U64, U64 ) | 118 | OPCODE(FPRecipSqrt64, U64, U64, ) |
| 100 | OPCODE(FPSqrt, U32, U32 ) | 119 | OPCODE(FPSqrt, U32, U32, ) |
| 101 | OPCODE(FPSin, U32, U32 ) | 120 | OPCODE(FPSin, U32, U32, ) |
| 102 | OPCODE(FPSinNotReduced, U32, U32 ) | 121 | OPCODE(FPSinNotReduced, U32, U32, ) |
| 103 | OPCODE(FPExp2, U32, U32 ) | 122 | OPCODE(FPExp2, U32, U32, ) |
| 104 | OPCODE(FPExp2NotReduced, U32, U32 ) | 123 | OPCODE(FPExp2NotReduced, U32, U32, ) |
| 105 | OPCODE(FPCos, U32, U32 ) | 124 | OPCODE(FPCos, U32, U32, ) |
| 106 | OPCODE(FPCosNotReduced, U32, U32 ) | 125 | OPCODE(FPCosNotReduced, U32, U32, ) |
| 107 | OPCODE(FPLog2, U32, U32 ) | 126 | OPCODE(FPLog2, U32, U32, ) |
| 108 | OPCODE(FPSaturate16, U16, U16 ) | 127 | OPCODE(FPSaturate16, U16, U16, ) |
| 109 | OPCODE(FPSaturate32, U32, U32 ) | 128 | OPCODE(FPSaturate32, U32, U32, ) |
| 110 | OPCODE(FPSaturate64, U64, U64 ) | 129 | OPCODE(FPSaturate64, U64, U64, ) |
| 111 | OPCODE(FPRoundEven16, U16, U16 ) | 130 | OPCODE(FPRoundEven16, U16, U16, ) |
| 112 | OPCODE(FPRoundEven32, U32, U32 ) | 131 | OPCODE(FPRoundEven32, U32, U32, ) |
| 113 | OPCODE(FPRoundEven64, U64, U64 ) | 132 | OPCODE(FPRoundEven64, U64, U64, ) |
| 114 | OPCODE(FPFloor16, U16, U16 ) | 133 | OPCODE(FPFloor16, U16, U16, ) |
| 115 | OPCODE(FPFloor32, U32, U32 ) | 134 | OPCODE(FPFloor32, U32, U32, ) |
| 116 | OPCODE(FPFloor64, U64, U64 ) | 135 | OPCODE(FPFloor64, U64, U64, ) |
| 117 | OPCODE(FPCeil16, U16, U16 ) | 136 | OPCODE(FPCeil16, U16, U16, ) |
| 118 | OPCODE(FPCeil32, U32, U32 ) | 137 | OPCODE(FPCeil32, U32, U32, ) |
| 119 | OPCODE(FPCeil64, U64, U64 ) | 138 | OPCODE(FPCeil64, U64, U64, ) |
| 120 | OPCODE(FPTrunc16, U16, U16 ) | 139 | OPCODE(FPTrunc16, U16, U16, ) |
| 121 | OPCODE(FPTrunc32, U32, U32 ) | 140 | OPCODE(FPTrunc32, U32, U32, ) |
| 122 | OPCODE(FPTrunc64, U64, U64 ) | 141 | OPCODE(FPTrunc64, U64, U64, ) |
| 142 | |||
| 143 | // Integer operations | ||
| 144 | OPCODE(IAdd32, U32, U32, U32, ) | ||
| 145 | OPCODE(IAdd64, U64, U64, U64, ) | ||
| 146 | OPCODE(IMul32, U32, U32, U32, ) | ||
| 147 | OPCODE(INeg32, U32, U32, ) | ||
| 148 | OPCODE(IAbs32, U32, U32, ) | ||
| 149 | OPCODE(ShiftLeftLogical32, U32, U32, U32, ) | ||
| 150 | OPCODE(ShiftRightLogical32, U32, U32, U32, ) | ||
| 151 | OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) | ||
| 152 | OPCODE(BitwiseAnd32, U32, U32, U32, ) | ||
| 153 | OPCODE(BitwiseOr32, U32, U32, U32, ) | ||
| 154 | OPCODE(BitwiseXor32, U32, U32, U32, ) | ||
| 155 | OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) | ||
| 156 | OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) | ||
| 157 | OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) | ||
| 158 | |||
| 159 | OPCODE(SLessThan, U1, U32, U32, ) | ||
| 160 | OPCODE(ULessThan, U1, U32, U32, ) | ||
| 161 | OPCODE(IEqual, U1, U32, U32, ) | ||
| 162 | OPCODE(SLessThanEqual, U1, U32, U32, ) | ||
| 163 | OPCODE(ULessThanEqual, U1, U32, U32, ) | ||
| 164 | OPCODE(SGreaterThan, U1, U32, U32, ) | ||
| 165 | OPCODE(UGreaterThan, U1, U32, U32, ) | ||
| 166 | OPCODE(INotEqual, U1, U32, U32, ) | ||
| 167 | OPCODE(SGreaterThanEqual, U1, U32, U32, ) | ||
| 168 | OPCODE(UGreaterThanEqual, U1, U32, U32, ) | ||
| 123 | 169 | ||
| 124 | // Logical operations | 170 | // Logical operations |
| 125 | OPCODE(LogicalOr, U1, U1, U1, ) | 171 | OPCODE(LogicalOr, U1, U1, U1, ) |
| 126 | OPCODE(LogicalAnd, U1, U1, U1, ) | 172 | OPCODE(LogicalAnd, U1, U1, U1, ) |
| 173 | OPCODE(LogicalXor, U1, U1, U1, ) | ||
| 127 | OPCODE(LogicalNot, U1, U1, ) | 174 | OPCODE(LogicalNot, U1, U1, ) |
| 128 | 175 | ||
| 129 | // Conversion operations | 176 | // Conversion operations |
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index daf23193f..c6f2f82bf 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h | |||
| @@ -8,7 +8,16 @@ | |||
| 8 | 8 | ||
| 9 | namespace Shader::IR { | 9 | namespace Shader::IR { |
| 10 | 10 | ||
| 11 | enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; | 11 | enum class Pred : u64 { |
| 12 | P0, | ||
| 13 | P1, | ||
| 14 | P2, | ||
| 15 | P3, | ||
| 16 | P4, | ||
| 17 | P5, | ||
| 18 | P6, | ||
| 19 | PT, | ||
| 20 | }; | ||
| 12 | 21 | ||
| 13 | constexpr size_t NUM_USER_PREDS = 6; | 22 | constexpr size_t NUM_USER_PREDS = 6; |
| 14 | constexpr size_t NUM_PREDS = 7; | 23 | constexpr size_t NUM_PREDS = 7; |
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 49d1f4bfb..bd1f96c07 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { | |||
| 56 | Optimization::Invoke(Optimization::IdentityRemovalPass, function); | 56 | Optimization::Invoke(Optimization::IdentityRemovalPass, function); |
| 57 | // Optimization::Invoke(Optimization::VerificationPass, function); | 57 | // Optimization::Invoke(Optimization::VerificationPass, function); |
| 58 | } | 58 | } |
| 59 | //*/ | ||
| 59 | } | 60 | } |
| 60 | 61 | ||
| 61 | std::string DumpProgram(const Program& program) { | 62 | std::string DumpProgram(const Program& program) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..3da37a2bb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | enum class FpRounding : u64 { | ||
| 15 | RN, | ||
| 16 | RM, | ||
| 17 | RP, | ||
| 18 | RZ, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class FmzMode : u64 { | ||
| 22 | None, | ||
| 23 | FTZ, | ||
| 24 | FMZ, | ||
| 25 | INVALIDFMZ3, | ||
| 26 | }; | ||
| 27 | |||
| 28 | inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { | ||
| 29 | switch (fp_rounding) { | ||
| 30 | case FpRounding::RN: | ||
| 31 | return IR::FpRounding::RN; | ||
| 32 | case FpRounding::RM: | ||
| 33 | return IR::FpRounding::RM; | ||
| 34 | case FpRounding::RP: | ||
| 35 | return IR::FpRounding::RP; | ||
| 36 | case FpRounding::RZ: | ||
| 37 | return IR::FpRounding::RZ; | ||
| 38 | } | ||
| 39 | throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); | ||
| 40 | } | ||
| 41 | |||
| 42 | inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { | ||
| 43 | switch (fmz_mode) { | ||
| 44 | case FmzMode::None: | ||
| 45 | return IR::FmzMode::None; | ||
| 46 | case FmzMode::FTZ: | ||
| 47 | return IR::FmzMode::FTZ; | ||
| 48 | case FmzMode::FMZ: | ||
| 49 | return IR::FmzMode::FMZ; | ||
| 50 | case FmzMode::INVALIDFMZ3: | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); | ||
| 54 | } | ||
| 55 | |||
| 56 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..d2c44b9cc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, | ||
| 14 | const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { | ||
| 15 | union { | ||
| 16 | u64 raw; | ||
| 17 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 18 | BitField<8, 8, IR::Reg> src_a; | ||
| 19 | } const fadd{insn}; | ||
| 20 | |||
| 21 | if (sat) { | ||
| 22 | throw NotImplementedException("FADD SAT"); | ||
| 23 | } | ||
| 24 | if (cc) { | ||
| 25 | throw NotImplementedException("FADD CC"); | ||
| 26 | } | ||
| 27 | const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; | ||
| 28 | const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; | ||
| 29 | IR::FpControl control{ | ||
| 30 | .no_contraction{true}, | ||
| 31 | .rounding{CastFpRounding(fp_rounding)}, | ||
| 32 | .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||
| 33 | }; | ||
| 34 | v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { | ||
| 38 | union { | ||
| 39 | u64 raw; | ||
| 40 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 41 | BitField<44, 1, u64> ftz; | ||
| 42 | BitField<45, 1, u64> neg_b; | ||
| 43 | BitField<46, 1, u64> abs_a; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<48, 1, u64> neg_a; | ||
| 46 | BitField<49, 1, u64> abs_b; | ||
| 47 | BitField<50, 1, u64> sat; | ||
| 48 | } const fadd{insn}; | ||
| 49 | |||
| 50 | FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, | ||
| 51 | fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); | ||
| 52 | } | ||
| 53 | } // Anonymous namespace | ||
| 54 | |||
| 55 | void TranslatorVisitor::FADD_reg(u64 insn) { | ||
| 56 | FADD(*this, insn, GetReg20(insn)); | ||
| 57 | } | ||
| 58 | |||
| 59 | void TranslatorVisitor::FADD_cbuf(u64) { | ||
| 60 | throw NotImplementedException("FADD (cbuf)"); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::FADD_imm(u64) { | ||
| 64 | throw NotImplementedException("FADD (imm)"); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::FADD32I(u64) { | ||
| 68 | throw NotImplementedException("FADD32I"); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..30ca052ec --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, | ||
| 13 | bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a; | ||
| 18 | } const ffma{insn}; | ||
| 19 | |||
| 20 | if (sat) { | ||
| 21 | throw NotImplementedException("FFMA SAT"); | ||
| 22 | } | ||
| 23 | if (cc) { | ||
| 24 | throw NotImplementedException("FFMA CC"); | ||
| 25 | } | ||
| 26 | const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; | ||
| 27 | const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 28 | const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; | ||
| 29 | const IR::FpControl fp_control{ | ||
| 30 | .no_contraction{true}, | ||
| 31 | .rounding{CastFpRounding(fp_rounding)}, | ||
| 32 | .fmz_mode{CastFmzMode(fmz_mode)}, | ||
| 33 | }; | ||
| 34 | v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { | ||
| 38 | union { | ||
| 39 | u64 raw; | ||
| 40 | BitField<47, 1, u64> cc; | ||
| 41 | BitField<48, 1, u64> neg_b; | ||
| 42 | BitField<49, 1, u64> neg_c; | ||
| 43 | BitField<50, 1, u64> sat; | ||
| 44 | BitField<51, 2, FpRounding> fp_rounding; | ||
| 45 | BitField<53, 2, FmzMode> fmz_mode; | ||
| 46 | } const ffma{insn}; | ||
| 47 | |||
| 48 | FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, | ||
| 49 | ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); | ||
| 50 | } | ||
| 51 | } // Anonymous namespace | ||
| 52 | |||
| 53 | void TranslatorVisitor::FFMA_reg(u64 insn) { | ||
| 54 | FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); | ||
| 55 | } | ||
| 56 | |||
| 57 | void TranslatorVisitor::FFMA_rc(u64) { | ||
| 58 | throw NotImplementedException("FFMA (rc)"); | ||
| 59 | } | ||
| 60 | |||
| 61 | void TranslatorVisitor::FFMA_cr(u64 insn) { | ||
| 62 | FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||
| 63 | } | ||
| 64 | |||
| 65 | void TranslatorVisitor::FFMA_imm(u64) { | ||
| 66 | throw NotImplementedException("FFMA (imm)"); | ||
| 67 | } | ||
| 68 | |||
| 69 | void TranslatorVisitor::FFMA32I(u64) { | ||
| 70 | throw NotImplementedException("FFMA32I"); | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..743a1e2f0 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class Scale : u64 { | ||
| 14 | None, | ||
| 15 | D2, | ||
| 16 | D4, | ||
| 17 | D8, | ||
| 18 | M8, | ||
| 19 | M4, | ||
| 20 | M2, | ||
| 21 | INVALIDSCALE37, | ||
| 22 | }; | ||
| 23 | |||
| 24 | float ScaleFactor(Scale scale) { | ||
| 25 | switch (scale) { | ||
| 26 | case Scale::None: | ||
| 27 | return 1.0f; | ||
| 28 | case Scale::D2: | ||
| 29 | return 1.0f / 2.0f; | ||
| 30 | case Scale::D4: | ||
| 31 | return 1.0f / 4.0f; | ||
| 32 | case Scale::D8: | ||
| 33 | return 1.0f / 8.0f; | ||
| 34 | case Scale::M8: | ||
| 35 | return 8.0f; | ||
| 36 | case Scale::M4: | ||
| 37 | return 4.0f; | ||
| 38 | case Scale::M2: | ||
| 39 | return 2.0f; | ||
| 40 | case Scale::INVALIDSCALE37: | ||
| 41 | break; | ||
| 42 | } | ||
| 43 | throw NotImplementedException("Invalid FMUL scale {}", scale); | ||
| 44 | } | ||
| 45 | |||
| 46 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, | ||
| 47 | FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { | ||
| 48 | union { | ||
| 49 | u64 raw; | ||
| 50 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 51 | BitField<8, 8, IR::Reg> src_a; | ||
| 52 | } const fmul{insn}; | ||
| 53 | |||
| 54 | if (cc) { | ||
| 55 | throw NotImplementedException("FMUL CC"); | ||
| 56 | } | ||
| 57 | if (sat) { | ||
| 58 | throw NotImplementedException("FMUL SAT"); | ||
| 59 | } | ||
| 60 | IR::U32 op_a{v.X(fmul.src_a)}; | ||
| 61 | if (scale != Scale::None) { | ||
| 62 | if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { | ||
| 63 | throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); | ||
| 64 | } | ||
| 65 | op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); | ||
| 66 | } | ||
| 67 | const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 68 | const IR::FpControl fp_control{ | ||
| 69 | .no_contraction{true}, | ||
| 70 | .rounding{CastFpRounding(fp_rounding)}, | ||
| 71 | .fmz_mode{CastFmzMode(fmz_mode)}, | ||
| 72 | }; | ||
| 73 | v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); | ||
| 74 | } | ||
| 75 | |||
| 76 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { | ||
| 77 | union { | ||
| 78 | u64 raw; | ||
| 79 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 80 | BitField<41, 3, Scale> scale; | ||
| 81 | BitField<44, 2, FmzMode> fmz; | ||
| 82 | BitField<47, 1, u64> cc; | ||
| 83 | BitField<48, 1, u64> neg_b; | ||
| 84 | BitField<50, 1, u64> sat; | ||
| 85 | } fmul{insn}; | ||
| 86 | |||
| 87 | FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, | ||
| 88 | fmul.neg_b != 0); | ||
| 89 | } | ||
| 90 | } // Anonymous namespace | ||
| 91 | |||
| 92 | void TranslatorVisitor::FMUL_reg(u64 insn) { | ||
| 93 | return FMUL(*this, insn, GetReg20(insn)); | ||
| 94 | } | ||
| 95 | |||
| 96 | void TranslatorVisitor::FMUL_cbuf(u64) { | ||
| 97 | throw NotImplementedException("FMUL (cbuf)"); | ||
| 98 | } | ||
| 99 | |||
| 100 | void TranslatorVisitor::FMUL_imm(u64) { | ||
| 101 | throw NotImplementedException("FMUL (imm)"); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::FMUL32I(u64) { | ||
| 105 | throw NotImplementedException("FMUL32I"); | ||
| 106 | } | ||
| 107 | |||
| 108 | } // namespace Shader::Maxwell \ No newline at end of file | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 7bc7ce9f2..548c7f611 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp | |||
| @@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { | |||
| 16 | ir.SetReg(dest_reg, value); | 16 | ir.SetReg(dest_reg, value); |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | IR::U32 TranslatorVisitor::GetReg20(u64 insn) { | ||
| 20 | union { | ||
| 21 | u64 raw; | ||
| 22 | BitField<20, 8, IR::Reg> index; | ||
| 23 | } const reg{insn}; | ||
| 24 | return X(reg.index); | ||
| 25 | } | ||
| 26 | |||
| 27 | IR::U32 TranslatorVisitor::GetReg39(u64 insn) { | ||
| 28 | union { | ||
| 29 | u64 raw; | ||
| 30 | BitField<39, 8, IR::Reg> index; | ||
| 31 | } const reg{insn}; | ||
| 32 | return X(reg.index); | ||
| 33 | } | ||
| 34 | |||
| 19 | IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { | 35 | IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { |
| 20 | union { | 36 | union { |
| 21 | u64 raw; | 37 | u64 raw; |
| @@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { | |||
| 33 | return ir.GetCbuf(binding, byte_offset); | 49 | return ir.GetCbuf(binding, byte_offset); |
| 34 | } | 50 | } |
| 35 | 51 | ||
| 36 | IR::U32 TranslatorVisitor::GetImm(u64 insn) { | 52 | IR::U32 TranslatorVisitor::GetImm20(u64 insn) { |
| 37 | union { | 53 | union { |
| 38 | u64 raw; | 54 | u64 raw; |
| 39 | BitField<20, 19, u64> value; | 55 | BitField<20, 19, u64> value; |
| @@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) { | |||
| 44 | return ir.Imm32(value); | 60 | return ir.Imm32(value); |
| 45 | } | 61 | } |
| 46 | 62 | ||
| 63 | IR::U32 TranslatorVisitor::GetImm32(u64 insn) { | ||
| 64 | union { | ||
| 65 | u64 raw; | ||
| 66 | BitField<20, 32, u64> value; | ||
| 67 | } const imm{insn}; | ||
| 68 | return ir.Imm32(static_cast<u32>(imm.value)); | ||
| 69 | } | ||
| 70 | |||
| 47 | void TranslatorVisitor::SetZFlag(const IR::U1& value) { | 71 | void TranslatorVisitor::SetZFlag(const IR::U1& value) { |
| 48 | ir.SetZFlag(value); | 72 | ir.SetZFlag(value); |
| 49 | } | 73 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8be7d6ff1..ef6d977fe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h | |||
| @@ -46,7 +46,7 @@ public: | |||
| 46 | void DADD_reg(u64 insn); | 46 | void DADD_reg(u64 insn); |
| 47 | void DADD_cbuf(u64 insn); | 47 | void DADD_cbuf(u64 insn); |
| 48 | void DADD_imm(u64 insn); | 48 | void DADD_imm(u64 insn); |
| 49 | void DEPBAR(u64 insn); | 49 | void DEPBAR(); |
| 50 | void DFMA_reg(u64 insn); | 50 | void DFMA_reg(u64 insn); |
| 51 | void DFMA_rc(u64 insn); | 51 | void DFMA_rc(u64 insn); |
| 52 | void DFMA_cr(u64 insn); | 52 | void DFMA_cr(u64 insn); |
| @@ -298,9 +298,14 @@ public: | |||
| 298 | [[nodiscard]] IR::U32 X(IR::Reg reg); | 298 | [[nodiscard]] IR::U32 X(IR::Reg reg); |
| 299 | void X(IR::Reg dest_reg, const IR::U32& value); | 299 | void X(IR::Reg dest_reg, const IR::U32& value); |
| 300 | 300 | ||
| 301 | [[nodiscard]] IR::U32 GetReg20(u64 insn); | ||
| 302 | [[nodiscard]] IR::U32 GetReg39(u64 insn); | ||
| 303 | |||
| 301 | [[nodiscard]] IR::U32 GetCbuf(u64 insn); | 304 | [[nodiscard]] IR::U32 GetCbuf(u64 insn); |
| 302 | 305 | ||
| 303 | [[nodiscard]] IR::U32 GetImm(u64 insn); | 306 | [[nodiscard]] IR::U32 GetImm20(u64 insn); |
| 307 | |||
| 308 | [[nodiscard]] IR::U32 GetImm32(u64 insn); | ||
| 304 | 309 | ||
| 305 | void SetZFlag(const IR::U1& value); | 310 | void SetZFlag(const IR::U1& value); |
| 306 | void SetSFlag(const IR::U1& value); | 311 | void SetSFlag(const IR::U1& value); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..60f79b160 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp | |||
| @@ -0,0 +1,106 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, | ||
| 12 | bool cc) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a; | ||
| 17 | } const iadd{insn}; | ||
| 18 | |||
| 19 | if (sat) { | ||
| 20 | throw NotImplementedException("IADD SAT"); | ||
| 21 | } | ||
| 22 | if (x && po) { | ||
| 23 | throw NotImplementedException("IADD X+PO"); | ||
| 24 | } | ||
| 25 | // Operand A is always read from here, negated if needed | ||
| 26 | IR::U32 op_a{v.X(iadd.src_a)}; | ||
| 27 | if (neg_a) { | ||
| 28 | op_a = v.ir.INeg(op_a); | ||
| 29 | } | ||
| 30 | // Add both operands | ||
| 31 | IR::U32 result{v.ir.IAdd(op_a, op_b)}; | ||
| 32 | if (x) { | ||
| 33 | const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||
| 34 | result = v.ir.IAdd(result, carry); | ||
| 35 | } | ||
| 36 | if (po) { | ||
| 37 | // .PO adds one to the result | ||
| 38 | result = v.ir.IAdd(result, v.ir.Imm32(1)); | ||
| 39 | } | ||
| 40 | if (cc) { | ||
| 41 | // Store flags | ||
| 42 | // TODO: Does this grab the result pre-PO or after? | ||
| 43 | if (po) { | ||
| 44 | throw NotImplementedException("IADD CC+PO"); | ||
| 45 | } | ||
| 46 | // TODO: How does CC behave when X is set? | ||
| 47 | if (x) { | ||
| 48 | throw NotImplementedException("IADD X+CC"); | ||
| 49 | } | ||
| 50 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 51 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 52 | v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||
| 53 | v.SetOFlag(v.ir.GetOverflowFromOp(result)); | ||
| 54 | } | ||
| 55 | // Store result | ||
| 56 | v.X(iadd.dest_reg, result); | ||
| 57 | } | ||
| 58 | |||
| 59 | void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 60 | union { | ||
| 61 | u64 insn; | ||
| 62 | BitField<43, 1, u64> x; | ||
| 63 | BitField<47, 1, u64> cc; | ||
| 64 | BitField<48, 2, u64> three_for_po; | ||
| 65 | BitField<48, 1, u64> neg_b; | ||
| 66 | BitField<49, 1, u64> neg_a; | ||
| 67 | BitField<50, 1, u64> sat; | ||
| 68 | } const iadd{insn}; | ||
| 69 | |||
| 70 | const bool po{iadd.three_for_po == 3}; | ||
| 71 | const bool neg_a{!po && iadd.neg_a != 0}; | ||
| 72 | if (!po && iadd.neg_b != 0) { | ||
| 73 | op_b = v.ir.INeg(op_b); | ||
| 74 | } | ||
| 75 | IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); | ||
| 76 | } | ||
| 77 | } // Anonymous namespace | ||
| 78 | |||
| 79 | void TranslatorVisitor::IADD_reg(u64) { | ||
| 80 | throw NotImplementedException("IADD (reg)"); | ||
| 81 | } | ||
| 82 | |||
| 83 | void TranslatorVisitor::IADD_cbuf(u64 insn) { | ||
| 84 | IADD(*this, insn, GetCbuf(insn)); | ||
| 85 | } | ||
| 86 | |||
| 87 | void TranslatorVisitor::IADD_imm(u64) { | ||
| 88 | throw NotImplementedException("IADD (imm)"); | ||
| 89 | } | ||
| 90 | |||
| 91 | void TranslatorVisitor::IADD32I(u64 insn) { | ||
| 92 | union { | ||
| 93 | u64 raw; | ||
| 94 | BitField<52, 1, u64> cc; | ||
| 95 | BitField<53, 1, u64> x; | ||
| 96 | BitField<54, 1, u64> sat; | ||
| 97 | BitField<55, 2, u64> three_for_po; | ||
| 98 | BitField<56, 1, u64> neg_a; | ||
| 99 | } const iadd32i{insn}; | ||
| 100 | |||
| 101 | const bool po{iadd32i.three_for_po == 3}; | ||
| 102 | const bool neg_a{!po && iadd32i.neg_a != 0}; | ||
| 103 | IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); | ||
| 104 | } | ||
| 105 | |||
| 106 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..f92c0bbd6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> op_a; | ||
| 16 | BitField<47, 1, u64> cc; | ||
| 17 | BitField<48, 2, u64> three_for_po; | ||
| 18 | BitField<48, 1, u64> neg_b; | ||
| 19 | BitField<49, 1, u64> neg_a; | ||
| 20 | BitField<39, 5, u64> scale; | ||
| 21 | } const iscadd{insn}; | ||
| 22 | |||
| 23 | const bool po{iscadd.three_for_po == 3}; | ||
| 24 | IR::U32 op_a{v.X(iscadd.op_a)}; | ||
| 25 | if (!po) { | ||
| 26 | // When PO is not present, the bits are interpreted as negation | ||
| 27 | if (iscadd.neg_a != 0) { | ||
| 28 | op_a = v.ir.INeg(op_a); | ||
| 29 | } | ||
| 30 | if (iscadd.neg_b != 0) { | ||
| 31 | op_b = v.ir.INeg(op_b); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | // With the operands already processed, scale A | ||
| 35 | const IR::U32 scale{v.ir.Imm32(static_cast<u32>(iscadd.scale))}; | ||
| 36 | const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; | ||
| 37 | |||
| 38 | IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; | ||
| 39 | if (po) { | ||
| 40 | // .PO adds one to the final result | ||
| 41 | result = v.ir.IAdd(result, v.ir.Imm32(1)); | ||
| 42 | } | ||
| 43 | v.X(iscadd.dest_reg, result); | ||
| 44 | |||
| 45 | if (iscadd.cc != 0) { | ||
| 46 | throw NotImplementedException("ISCADD CC"); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | } // Anonymous namespace | ||
| 51 | |||
| 52 | void TranslatorVisitor::ISCADD_reg(u64 insn) { | ||
| 53 | union { | ||
| 54 | u64 raw; | ||
| 55 | BitField<20, 8, IR::Reg> op_b; | ||
| 56 | } const iscadd{insn}; | ||
| 57 | |||
| 58 | ISCADD(*this, insn, X(iscadd.op_b)); | ||
| 59 | } | ||
| 60 | |||
| 61 | void TranslatorVisitor::ISCADD_cbuf(u64) { | ||
| 62 | throw NotImplementedException("ISCADD (cbuf)"); | ||
| 63 | } | ||
| 64 | |||
| 65 | void TranslatorVisitor::ISCADD_imm(u64) { | ||
| 66 | throw NotImplementedException("ISCADD (imm)"); | ||
| 67 | } | ||
| 68 | |||
| 69 | void TranslatorVisitor::ISCADD32I(u64) { | ||
| 70 | throw NotImplementedException("ISCADD32I"); | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..76c6b5291 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class CompareOp : u64 { | ||
| 12 | F, // Always false | ||
| 13 | LT, // Less than | ||
| 14 | EQ, // Equal | ||
| 15 | LE, // Less than or equal | ||
| 16 | GT, // Greater than | ||
| 17 | NE, // Not equal | ||
| 18 | GE, // Greater than or equal | ||
| 19 | T, // Always true | ||
| 20 | }; | ||
| 21 | |||
| 22 | enum class Bop : u64 { | ||
| 23 | AND, | ||
| 24 | OR, | ||
| 25 | XOR, | ||
| 26 | }; | ||
| 27 | |||
| 28 | IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs, | ||
| 29 | bool is_signed) { | ||
| 30 | switch (op) { | ||
| 31 | case CompareOp::F: | ||
| 32 | return ir.Imm1(false); | ||
| 33 | case CompareOp::LT: | ||
| 34 | return ir.ILessThan(lhs, rhs, is_signed); | ||
| 35 | case CompareOp::EQ: | ||
| 36 | return ir.IEqual(lhs, rhs); | ||
| 37 | case CompareOp::LE: | ||
| 38 | return ir.ILessThanEqual(lhs, rhs, is_signed); | ||
| 39 | case CompareOp::GT: | ||
| 40 | return ir.IGreaterThan(lhs, rhs, is_signed); | ||
| 41 | case CompareOp::NE: | ||
| 42 | return ir.INotEqual(lhs, rhs); | ||
| 43 | case CompareOp::GE: | ||
| 44 | return ir.IGreaterThanEqual(lhs, rhs, is_signed); | ||
| 45 | case CompareOp::T: | ||
| 46 | return ir.Imm1(true); | ||
| 47 | } | ||
| 48 | throw NotImplementedException("Invalid ISETP compare op {}", op); | ||
| 49 | } | ||
| 50 | |||
| 51 | IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) { | ||
| 52 | switch (bop) { | ||
| 53 | case Bop::AND: | ||
| 54 | return ir.LogicalAnd(comparison, bop_pred); | ||
| 55 | case Bop::OR: | ||
| 56 | return ir.LogicalOr(comparison, bop_pred); | ||
| 57 | case Bop::XOR: | ||
| 58 | return ir.LogicalXor(comparison, bop_pred); | ||
| 59 | } | ||
| 60 | throw NotImplementedException("Invalid ISETP bop {}", bop); | ||
| 61 | } | ||
| 62 | |||
| 63 | void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 64 | union { | ||
| 65 | u64 raw; | ||
| 66 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 67 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 68 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 69 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 70 | BitField<42, 1, u64> neg_bop_pred; | ||
| 71 | BitField<45, 2, Bop> bop; | ||
| 72 | BitField<48, 1, u64> is_signed; | ||
| 73 | BitField<49, 3, CompareOp> compare_op; | ||
| 74 | } const isetp{insn}; | ||
| 75 | |||
| 76 | const Bop bop{isetp.bop}; | ||
| 77 | const IR::U32 op_a{v.X(isetp.src_reg_a)}; | ||
| 78 | const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)}; | ||
| 79 | const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; | ||
| 80 | const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)}; | ||
| 81 | const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)}; | ||
| 82 | v.ir.SetPred(isetp.dest_pred_a, result_a); | ||
| 83 | v.ir.SetPred(isetp.dest_pred_b, result_b); | ||
| 84 | } | ||
| 85 | } // Anonymous namespace | ||
| 86 | |||
| 87 | void TranslatorVisitor::ISETP_reg(u64 insn) { | ||
| 88 | ISETP(*this, insn, GetReg20(insn)); | ||
| 89 | } | ||
| 90 | |||
| 91 | void TranslatorVisitor::ISETP_cbuf(u64 insn) { | ||
| 92 | ISETP(*this, insn, GetCbuf(insn)); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::ISETP_imm(u64) { | ||
| 96 | throw NotImplementedException("ISETP_imm"); | ||
| 97 | } | ||
| 98 | |||
| 99 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..d4b417d14 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 16 | BitField<39, 1, u64> w; | ||
| 17 | BitField<43, 1, u64> x; | ||
| 18 | BitField<47, 1, u64> cc; | ||
| 19 | } const shl{insn}; | ||
| 20 | |||
| 21 | if (shl.x != 0) { | ||
| 22 | throw NotImplementedException("SHL.X"); | ||
| 23 | } | ||
| 24 | if (shl.cc != 0) { | ||
| 25 | throw NotImplementedException("SHL.CC"); | ||
| 26 | } | ||
| 27 | const IR::U32 base{v.X(shl.src_reg_a)}; | ||
| 28 | IR::U32 result; | ||
| 29 | if (shl.w != 0) { | ||
| 30 | // When .W is set, the shift value is wrapped | ||
| 31 | // To emulate this we just have to clamp it ourselves. | ||
| 32 | const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; | ||
| 33 | result = v.ir.ShiftLeftLogical(base, shift); | ||
| 34 | } else { | ||
| 35 | // When .W is not set, the shift value is clamped between 0 and 32. | ||
| 36 | // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. | ||
| 37 | // We can safely evaluate an out of bounds shift according to the SPIR-V specification: | ||
| 38 | // | ||
| 39 | // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical | ||
| 40 | // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than | ||
| 41 | // or equal to the bit width of the components of Base." | ||
| 42 | // | ||
| 43 | // And on the GLASM specification it is also safe to evaluate out of bounds: | ||
| 44 | // | ||
| 45 | // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt | ||
| 46 | // "The results of a shift operation ("<<") are undefined if the value of the second operand | ||
| 47 | // is negative, or greater than or equal to the number of bits in the first operand." | ||
| 48 | // | ||
| 49 | // Emphasis on undefined results in contrast to undefined behavior. | ||
| 50 | // | ||
| 51 | const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; | ||
| 52 | const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; | ||
| 53 | result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); | ||
| 54 | } | ||
| 55 | v.X(shl.dest_reg, result); | ||
| 56 | } | ||
| 57 | } // Anonymous namespace | ||
| 58 | |||
| 59 | void TranslatorVisitor::SHL_reg(u64) { | ||
| 60 | throw NotImplementedException("SHL_reg"); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::SHL_cbuf(u64) { | ||
| 64 | throw NotImplementedException("SHL_cbuf"); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::SHL_imm(u64 insn) { | ||
| 68 | SHL(*this, insn, GetImm20(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..70a7c76c5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SelectMode : u64 { | ||
| 12 | Default, | ||
| 13 | CLO, | ||
| 14 | CHI, | ||
| 15 | CSFU, | ||
| 16 | CBCC, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class Half : u64 { | ||
| 20 | H0, // Least-significant bits (15:0) | ||
| 21 | H1, // Most-significant bits (31:16) | ||
| 22 | }; | ||
| 23 | |||
| 24 | IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { | ||
| 25 | const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; | ||
| 26 | return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); | ||
| 27 | } | ||
| 28 | |||
| 29 | void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, | ||
| 30 | SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { | ||
| 31 | union { | ||
| 32 | u64 raw; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 35 | BitField<47, 1, u64> cc; | ||
| 36 | BitField<48, 1, u64> is_a_signed; | ||
| 37 | BitField<49, 1, u64> is_b_signed; | ||
| 38 | BitField<53, 1, Half> half_a; | ||
| 39 | } const xmad{insn}; | ||
| 40 | |||
| 41 | if (x) { | ||
| 42 | throw NotImplementedException("XMAD X"); | ||
| 43 | } | ||
| 44 | const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; | ||
| 45 | const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; | ||
| 46 | |||
| 47 | IR::U32 product{v.ir.IMul(op_a, op_b)}; | ||
| 48 | if (psl) { | ||
| 49 | // .PSL shifts the product 16 bits | ||
| 50 | product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); | ||
| 51 | } | ||
| 52 | const IR::U32 op_c{[&]() -> IR::U32 { | ||
| 53 | switch (select_mode) { | ||
| 54 | case SelectMode::Default: | ||
| 55 | return src_c; | ||
| 56 | case SelectMode::CLO: | ||
| 57 | return ExtractHalf(v, src_c, Half::H0, false); | ||
| 58 | case SelectMode::CHI: | ||
| 59 | return ExtractHalf(v, src_c, Half::H1, false); | ||
| 60 | case SelectMode::CBCC: | ||
| 61 | return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); | ||
| 62 | case SelectMode::CSFU: | ||
| 63 | throw NotImplementedException("XMAD CSFU"); | ||
| 64 | } | ||
| 65 | throw NotImplementedException("Invalid XMAD select mode {}", select_mode); | ||
| 66 | }()}; | ||
| 67 | IR::U32 result{v.ir.IAdd(product, op_c)}; | ||
| 68 | if (mrg) { | ||
| 69 | // .MRG inserts src_b [15:0] into result's [31:16]. | ||
| 70 | const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; | ||
| 71 | result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); | ||
| 72 | } | ||
| 73 | if (xmad.cc) { | ||
| 74 | throw NotImplementedException("XMAD CC"); | ||
| 75 | } | ||
| 76 | // Store result | ||
| 77 | v.X(xmad.dest_reg, result); | ||
| 78 | } | ||
| 79 | } // Anonymous namespace | ||
| 80 | |||
| 81 | void TranslatorVisitor::XMAD_reg(u64) { | ||
| 82 | throw NotImplementedException("XMAD (reg)"); | ||
| 83 | } | ||
| 84 | |||
| 85 | void TranslatorVisitor::XMAD_rc(u64) { | ||
| 86 | throw NotImplementedException("XMAD (rc)"); | ||
| 87 | } | ||
| 88 | |||
| 89 | void TranslatorVisitor::XMAD_cr(u64) { | ||
| 90 | throw NotImplementedException("XMAD (cr)"); | ||
| 91 | } | ||
| 92 | |||
| 93 | void TranslatorVisitor::XMAD_imm(u64 insn) { | ||
| 94 | union { | ||
| 95 | u64 raw; | ||
| 96 | BitField<20, 16, u64> src_b; | ||
| 97 | BitField<36, 1, u64> psl; | ||
| 98 | BitField<37, 1, u64> mrg; | ||
| 99 | BitField<38, 1, u64> x; | ||
| 100 | BitField<39, 8, IR::Reg> src_c; | ||
| 101 | BitField<50, 3, SelectMode> select_mode; | ||
| 102 | } const xmad{insn}; | ||
| 103 | |||
| 104 | const IR::U32 src_b{ir.Imm32(static_cast<u32>(xmad.src_b))}; | ||
| 105 | const IR::U32 src_c{X(xmad.src_c)}; | ||
| 106 | XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, | ||
| 107 | xmad.x != 0); | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index d8fd387cf..c9669c617 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp | |||
| @@ -10,16 +10,35 @@ | |||
| 10 | 10 | ||
| 11 | namespace Shader::Maxwell { | 11 | namespace Shader::Maxwell { |
| 12 | namespace { | 12 | namespace { |
| 13 | enum class LoadSize : u64 { | ||
| 14 | U8, // Zero-extend | ||
| 15 | S8, // Sign-extend | ||
| 16 | U16, // Zero-extend | ||
| 17 | S16, // Sign-extend | ||
| 18 | B32, | ||
| 19 | B64, | ||
| 20 | B128, | ||
| 21 | U128, // ??? | ||
| 22 | }; | ||
| 23 | |||
| 13 | enum class StoreSize : u64 { | 24 | enum class StoreSize : u64 { |
| 14 | U8, | 25 | U8, // Zero-extend |
| 15 | S8, | 26 | S8, // Sign-extend |
| 16 | U16, | 27 | U16, // Zero-extend |
| 17 | S16, | 28 | S16, // Sign-extend |
| 18 | B32, | 29 | B32, |
| 19 | B64, | 30 | B64, |
| 20 | B128, | 31 | B128, |
| 21 | }; | 32 | }; |
| 22 | 33 | ||
| 34 | // See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||
| 35 | enum class LoadCache : u64 { | ||
| 36 | CA, // Cache at all levels, likely to be accessed again | ||
| 37 | CG, // Cache at global level (cache in L2 and below, not L1) | ||
| 38 | CI, // ??? | ||
| 39 | CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) | ||
| 40 | }; | ||
| 41 | |||
| 23 | // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | 42 | // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html |
| 24 | enum class StoreCache : u64 { | 43 | enum class StoreCache : u64 { |
| 25 | WB, // Cache write-back all coherent levels | 44 | WB, // Cache write-back all coherent levels |
| @@ -27,61 +46,137 @@ enum class StoreCache : u64 { | |||
| 27 | CS, // Cache streaming, likely to be accessed once | 46 | CS, // Cache streaming, likely to be accessed once |
| 28 | WT, // Cache write-through (to system memory) | 47 | WT, // Cache write-through (to system memory) |
| 29 | }; | 48 | }; |
| 30 | } // Anonymous namespace | ||
| 31 | 49 | ||
| 32 | void TranslatorVisitor::STG(u64 insn) { | 50 | IR::U64 Address(TranslatorVisitor& v, u64 insn) { |
| 33 | // STG stores registers into global memory. | ||
| 34 | union { | 51 | union { |
| 35 | u64 raw; | 52 | u64 raw; |
| 36 | BitField<0, 8, IR::Reg> data_reg; | ||
| 37 | BitField<8, 8, IR::Reg> addr_reg; | 53 | BitField<8, 8, IR::Reg> addr_reg; |
| 54 | BitField<20, 24, s64> addr_offset; | ||
| 55 | BitField<20, 24, u64> rz_addr_offset; | ||
| 38 | BitField<45, 1, u64> e; | 56 | BitField<45, 1, u64> e; |
| 39 | BitField<46, 2, StoreCache> cache; | 57 | } const mem{insn}; |
| 40 | BitField<48, 3, StoreSize> size; | ||
| 41 | } const stg{insn}; | ||
| 42 | 58 | ||
| 43 | const IR::U64 address{[&]() -> IR::U64 { | 59 | const IR::U64 address{[&]() -> IR::U64 { |
| 44 | if (stg.e == 0) { | 60 | if (mem.e == 0) { |
| 45 | // STG without .E uses a 32-bit pointer, zero-extend it | 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it |
| 46 | return ir.ConvertU(64, X(stg.addr_reg)); | 62 | return v.ir.ConvertU(64, v.X(mem.addr_reg)); |
| 47 | } | 63 | } |
| 48 | if (!IR::IsAligned(stg.addr_reg, 2)) { | 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { |
| 49 | throw NotImplementedException("Unaligned address register"); | 65 | throw NotImplementedException("Unaligned address register"); |
| 50 | } | 66 | } |
| 51 | // Pack two registers to build the 32-bit address | 67 | // Pack two registers to build the 64-bit address |
| 52 | return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); | 68 | return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); |
| 69 | }()}; | ||
| 70 | const u64 addr_offset{[&]() -> u64 { | ||
| 71 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 72 | // When RZ is used, the address is an absolute address | ||
| 73 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 74 | } else { | ||
| 75 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 76 | } | ||
| 53 | }()}; | 77 | }()}; |
| 78 | // Apply the offset | ||
| 79 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 80 | } | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 83 | void TranslatorVisitor::LDG(u64 insn) { | ||
| 84 | // LDG loads global memory into registers | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<46, 2, LoadCache> cache; | ||
| 89 | BitField<48, 3, LoadSize> size; | ||
| 90 | } const ldg{insn}; | ||
| 91 | |||
| 92 | // Pointer to load data from | ||
| 93 | const IR::U64 address{Address(*this, insn)}; | ||
| 94 | const IR::Reg dest_reg{ldg.dest_reg}; | ||
| 95 | switch (ldg.size) { | ||
| 96 | case LoadSize::U8: | ||
| 97 | X(dest_reg, ir.LoadGlobalU8(address)); | ||
| 98 | break; | ||
| 99 | case LoadSize::S8: | ||
| 100 | X(dest_reg, ir.LoadGlobalS8(address)); | ||
| 101 | break; | ||
| 102 | case LoadSize::U16: | ||
| 103 | X(dest_reg, ir.LoadGlobalU16(address)); | ||
| 104 | break; | ||
| 105 | case LoadSize::S16: | ||
| 106 | X(dest_reg, ir.LoadGlobalS16(address)); | ||
| 107 | break; | ||
| 108 | case LoadSize::B32: | ||
| 109 | X(dest_reg, ir.LoadGlobal32(address)); | ||
| 110 | break; | ||
| 111 | case LoadSize::B64: { | ||
| 112 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 113 | throw NotImplementedException("Unaligned data registers"); | ||
| 114 | } | ||
| 115 | const IR::Value vector{ir.LoadGlobal64(address)}; | ||
| 116 | for (int i = 0; i < 2; ++i) { | ||
| 117 | X(dest_reg + i, ir.CompositeExtract(vector, i)); | ||
| 118 | } | ||
| 119 | break; | ||
| 120 | } | ||
| 121 | case LoadSize::B128: { | ||
| 122 | if (!IR::IsAligned(dest_reg, 4)) { | ||
| 123 | throw NotImplementedException("Unaligned data registers"); | ||
| 124 | } | ||
| 125 | const IR::Value vector{ir.LoadGlobal128(address)}; | ||
| 126 | for (int i = 0; i < 4; ++i) { | ||
| 127 | X(dest_reg + i, ir.CompositeExtract(vector, i)); | ||
| 128 | } | ||
| 129 | break; | ||
| 130 | } | ||
| 131 | case LoadSize::U128: | ||
| 132 | throw NotImplementedException("LDG U.128"); | ||
| 133 | default: | ||
| 134 | throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 138 | void TranslatorVisitor::STG(u64 insn) { | ||
| 139 | // STG stores registers into global memory. | ||
| 140 | union { | ||
| 141 | u64 raw; | ||
| 142 | BitField<0, 8, IR::Reg> data_reg; | ||
| 143 | BitField<46, 2, StoreCache> cache; | ||
| 144 | BitField<48, 3, StoreSize> size; | ||
| 145 | } const stg{insn}; | ||
| 54 | 146 | ||
| 147 | // Pointer to store data into | ||
| 148 | const IR::U64 address{Address(*this, insn)}; | ||
| 149 | const IR::Reg data_reg{stg.data_reg}; | ||
| 55 | switch (stg.size) { | 150 | switch (stg.size) { |
| 56 | case StoreSize::U8: | 151 | case StoreSize::U8: |
| 57 | ir.WriteGlobalU8(address, X(stg.data_reg)); | 152 | ir.WriteGlobalU8(address, X(data_reg)); |
| 58 | break; | 153 | break; |
| 59 | case StoreSize::S8: | 154 | case StoreSize::S8: |
| 60 | ir.WriteGlobalS8(address, X(stg.data_reg)); | 155 | ir.WriteGlobalS8(address, X(data_reg)); |
| 61 | break; | 156 | break; |
| 62 | case StoreSize::U16: | 157 | case StoreSize::U16: |
| 63 | ir.WriteGlobalU16(address, X(stg.data_reg)); | 158 | ir.WriteGlobalU16(address, X(data_reg)); |
| 64 | break; | 159 | break; |
| 65 | case StoreSize::S16: | 160 | case StoreSize::S16: |
| 66 | ir.WriteGlobalS16(address, X(stg.data_reg)); | 161 | ir.WriteGlobalS16(address, X(data_reg)); |
| 67 | break; | 162 | break; |
| 68 | case StoreSize::B32: | 163 | case StoreSize::B32: |
| 69 | ir.WriteGlobal32(address, X(stg.data_reg)); | 164 | ir.WriteGlobal32(address, X(data_reg)); |
| 70 | break; | 165 | break; |
| 71 | case StoreSize::B64: { | 166 | case StoreSize::B64: { |
| 72 | if (!IR::IsAligned(stg.data_reg, 2)) { | 167 | if (!IR::IsAligned(data_reg, 2)) { |
| 73 | throw NotImplementedException("Unaligned data registers"); | 168 | throw NotImplementedException("Unaligned data registers"); |
| 74 | } | 169 | } |
| 75 | const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; | 170 | const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; |
| 76 | ir.WriteGlobal64(address, vector); | 171 | ir.WriteGlobal64(address, vector); |
| 77 | break; | 172 | break; |
| 78 | } | 173 | } |
| 79 | case StoreSize::B128: | 174 | case StoreSize::B128: |
| 80 | if (!IR::IsAligned(stg.data_reg, 4)) { | 175 | if (!IR::IsAligned(data_reg, 4)) { |
| 81 | throw NotImplementedException("Unaligned data registers"); | 176 | throw NotImplementedException("Unaligned data registers"); |
| 82 | } | 177 | } |
| 83 | const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), | 178 | const IR::Value vector{ |
| 84 | X(stg.data_reg + 2), X(stg.data_reg + 3))}; | 179 | ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; |
| 85 | ir.WriteGlobal128(address, vector); | 180 | ir.WriteGlobal128(address, vector); |
| 86 | break; | 181 | break; |
| 87 | } | 182 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 7fa35ba3a..1711d3f48 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp | |||
| @@ -39,7 +39,7 @@ void TranslatorVisitor::MOV_cbuf(u64 insn) { | |||
| 39 | void TranslatorVisitor::MOV_imm(u64 insn) { | 39 | void TranslatorVisitor::MOV_imm(u64 insn) { |
| 40 | const MOV mov{insn}; | 40 | const MOV mov{insn}; |
| 41 | CheckMask(mov); | 41 | CheckMask(mov); |
| 42 | X(mov.dest_reg, GetImm(insn)); | 42 | X(mov.dest_reg, GetImm20(insn)); |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | } // namespace Shader::Maxwell | 45 | } // namespace Shader::Maxwell |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..93cea302a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp | |||
| @@ -0,0 +1,114 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SpecialRegister : u64 { | ||
| 12 | SR_LANEID = 0, | ||
| 13 | SR_VIRTCFG = 2, | ||
| 14 | SR_VIRTID = 3, | ||
| 15 | SR_PM0 = 4, | ||
| 16 | SR_PM1 = 5, | ||
| 17 | SR_PM2 = 6, | ||
| 18 | SR_PM3 = 7, | ||
| 19 | SR_PM4 = 8, | ||
| 20 | SR_PM5 = 9, | ||
| 21 | SR_PM6 = 10, | ||
| 22 | SR_PM7 = 11, | ||
| 23 | SR_ORDERING_TICKET = 15, | ||
| 24 | SR_PRIM_TYPE = 16, | ||
| 25 | SR_INVOCATION_ID = 17, | ||
| 26 | SR_Y_DIRECTION = 18, | ||
| 27 | SR_THREAD_KILL = 19, | ||
| 28 | SM_SHADER_TYPE = 20, | ||
| 29 | SR_DIRECTCBEWRITEADDRESSLOW = 21, | ||
| 30 | SR_DIRECTCBEWRITEADDRESSHIGH = 22, | ||
| 31 | SR_DIRECTCBEWRITEENABLE = 23, | ||
| 32 | SR_MACHINE_ID_0 = 24, | ||
| 33 | SR_MACHINE_ID_1 = 25, | ||
| 34 | SR_MACHINE_ID_2 = 26, | ||
| 35 | SR_MACHINE_ID_3 = 27, | ||
| 36 | SR_AFFINITY = 28, | ||
| 37 | SR_INVOCATION_INFO = 29, | ||
| 38 | SR_WSCALEFACTOR_XY = 30, | ||
| 39 | SR_WSCALEFACTOR_Z = 31, | ||
| 40 | SR_TID = 32, | ||
| 41 | SR_TID_X = 33, | ||
| 42 | SR_TID_Y = 34, | ||
| 43 | SR_TID_Z = 35, | ||
| 44 | SR_CTAID_X = 37, | ||
| 45 | SR_CTAID_Y = 38, | ||
| 46 | SR_CTAID_Z = 39, | ||
| 47 | SR_NTID = 49, | ||
| 48 | SR_CirQueueIncrMinusOne = 50, | ||
| 49 | SR_NLATC = 51, | ||
| 50 | SR_SWINLO = 57, | ||
| 51 | SR_SWINSZ = 58, | ||
| 52 | SR_SMEMSZ = 59, | ||
| 53 | SR_SMEMBANKS = 60, | ||
| 54 | SR_LWINLO = 61, | ||
| 55 | SR_LWINSZ = 62, | ||
| 56 | SR_LMEMLOSZ = 63, | ||
| 57 | SR_LMEMHIOFF = 64, | ||
| 58 | SR_EQMASK = 65, | ||
| 59 | SR_LTMASK = 66, | ||
| 60 | SR_LEMASK = 67, | ||
| 61 | SR_GTMASK = 68, | ||
| 62 | SR_GEMASK = 69, | ||
| 63 | SR_REGALLOC = 70, | ||
| 64 | SR_GLOBALERRORSTATUS = 73, | ||
| 65 | SR_WARPERRORSTATUS = 75, | ||
| 66 | SR_PM_HI0 = 81, | ||
| 67 | SR_PM_HI1 = 82, | ||
| 68 | SR_PM_HI2 = 83, | ||
| 69 | SR_PM_HI3 = 84, | ||
| 70 | SR_PM_HI4 = 85, | ||
| 71 | SR_PM_HI5 = 86, | ||
| 72 | SR_PM_HI6 = 87, | ||
| 73 | SR_PM_HI7 = 88, | ||
| 74 | SR_CLOCKLO = 89, | ||
| 75 | SR_CLOCKHI = 90, | ||
| 76 | SR_GLOBALTIMERLO = 91, | ||
| 77 | SR_GLOBALTIMERHI = 92, | ||
| 78 | SR_HWTASKID = 105, | ||
| 79 | SR_CIRCULARQUEUEENTRYINDEX = 106, | ||
| 80 | SR_CIRCULARQUEUEENTRYADDRESSLOW = 107, | ||
| 81 | SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108, | ||
| 82 | }; | ||
| 83 | |||
| 84 | [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { | ||
| 85 | switch (special_register) { | ||
| 86 | case SpecialRegister::SR_TID_X: | ||
| 87 | return ir.LocalInvocationIdX(); | ||
| 88 | case SpecialRegister::SR_TID_Y: | ||
| 89 | return ir.LocalInvocationIdY(); | ||
| 90 | case SpecialRegister::SR_TID_Z: | ||
| 91 | return ir.LocalInvocationIdZ(); | ||
| 92 | case SpecialRegister::SR_CTAID_X: | ||
| 93 | return ir.WorkgroupIdX(); | ||
| 94 | case SpecialRegister::SR_CTAID_Y: | ||
| 95 | return ir.WorkgroupIdY(); | ||
| 96 | case SpecialRegister::SR_CTAID_Z: | ||
| 97 | return ir.WorkgroupIdZ(); | ||
| 98 | default: | ||
| 99 | throw NotImplementedException("S2R special register {}", special_register); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | } // Anonymous namespace | ||
| 103 | |||
| 104 | void TranslatorVisitor::S2R(u64 insn) { | ||
| 105 | union { | ||
| 106 | u64 raw; | ||
| 107 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 108 | BitField<20, 8, SpecialRegister> src_reg; | ||
| 109 | } const s2r{insn}; | ||
| 110 | |||
| 111 | X(s2r.dest_reg, Read(ir, s2r.src_reg)); | ||
| 112 | } | ||
| 113 | |||
| 114 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 0f52696d1..d70399f6b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -7,21 +7,8 @@ | |||
| 7 | #include "shader_recompiler/frontend/maxwell/opcode.h" | 7 | #include "shader_recompiler/frontend/maxwell/opcode.h" |
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" |
| 9 | 9 | ||
| 10 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | 10 | namespace Shader::Maxwell { |
| 13 | 11 | ||
| 14 | [[maybe_unused]] static inline void DumpOptimized(IR::Block& block) { | ||
| 15 | auto raw{IR::DumpBlock(block)}; | ||
| 16 | |||
| 17 | Optimization::GetSetElimination(block); | ||
| 18 | Optimization::DeadCodeEliminationPass(block); | ||
| 19 | Optimization::IdentityRemovalPass(block); | ||
| 20 | auto dumped{IR::DumpBlock(block)}; | ||
| 21 | |||
| 22 | fmt::print(stderr, "{}", dumped); | ||
| 23 | } | ||
| 24 | |||
| 25 | [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { | 12 | [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { |
| 26 | throw NotImplementedException("Instruction {} is not implemented", opcode); | 13 | throw NotImplementedException("Instruction {} is not implemented", opcode); |
| 27 | } | 14 | } |
| @@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) { | |||
| 146 | ThrowNotImplemented(Opcode::DADD_imm); | 133 | ThrowNotImplemented(Opcode::DADD_imm); |
| 147 | } | 134 | } |
| 148 | 135 | ||
| 149 | void TranslatorVisitor::DEPBAR(u64) { | 136 | void TranslatorVisitor::DEPBAR() { |
| 150 | ThrowNotImplemented(Opcode::DEPBAR); | 137 | // DEPBAR is a no-op |
| 151 | } | 138 | } |
| 152 | 139 | ||
| 153 | void TranslatorVisitor::DFMA_reg(u64) { | 140 | void TranslatorVisitor::DFMA_reg(u64) { |
| @@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) { | |||
| 230 | ThrowNotImplemented(Opcode::F2F_imm); | 217 | ThrowNotImplemented(Opcode::F2F_imm); |
| 231 | } | 218 | } |
| 232 | 219 | ||
| 233 | void TranslatorVisitor::FADD_reg(u64) { | ||
| 234 | ThrowNotImplemented(Opcode::FADD_reg); | ||
| 235 | } | ||
| 236 | |||
| 237 | void TranslatorVisitor::FADD_cbuf(u64) { | ||
| 238 | ThrowNotImplemented(Opcode::FADD_cbuf); | ||
| 239 | } | ||
| 240 | |||
| 241 | void TranslatorVisitor::FADD_imm(u64) { | ||
| 242 | ThrowNotImplemented(Opcode::FADD_imm); | ||
| 243 | } | ||
| 244 | |||
| 245 | void TranslatorVisitor::FADD32I(u64) { | ||
| 246 | ThrowNotImplemented(Opcode::FADD32I); | ||
| 247 | } | ||
| 248 | |||
| 249 | void TranslatorVisitor::FCHK_reg(u64) { | 220 | void TranslatorVisitor::FCHK_reg(u64) { |
| 250 | ThrowNotImplemented(Opcode::FCHK_reg); | 221 | ThrowNotImplemented(Opcode::FCHK_reg); |
| 251 | } | 222 | } |
| @@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) { | |||
| 274 | ThrowNotImplemented(Opcode::FCMP_imm); | 245 | ThrowNotImplemented(Opcode::FCMP_imm); |
| 275 | } | 246 | } |
| 276 | 247 | ||
| 277 | void TranslatorVisitor::FFMA_reg(u64) { | ||
| 278 | ThrowNotImplemented(Opcode::FFMA_reg); | ||
| 279 | } | ||
| 280 | |||
| 281 | void TranslatorVisitor::FFMA_rc(u64) { | ||
| 282 | ThrowNotImplemented(Opcode::FFMA_rc); | ||
| 283 | } | ||
| 284 | |||
| 285 | void TranslatorVisitor::FFMA_cr(u64) { | ||
| 286 | ThrowNotImplemented(Opcode::FFMA_cr); | ||
| 287 | } | ||
| 288 | |||
| 289 | void TranslatorVisitor::FFMA_imm(u64) { | ||
| 290 | ThrowNotImplemented(Opcode::FFMA_imm); | ||
| 291 | } | ||
| 292 | |||
| 293 | void TranslatorVisitor::FFMA32I(u64) { | ||
| 294 | ThrowNotImplemented(Opcode::FFMA32I); | ||
| 295 | } | ||
| 296 | |||
| 297 | void TranslatorVisitor::FLO_reg(u64) { | 248 | void TranslatorVisitor::FLO_reg(u64) { |
| 298 | ThrowNotImplemented(Opcode::FLO_reg); | 249 | ThrowNotImplemented(Opcode::FLO_reg); |
| 299 | } | 250 | } |
| @@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) { | |||
| 318 | ThrowNotImplemented(Opcode::FMNMX_imm); | 269 | ThrowNotImplemented(Opcode::FMNMX_imm); |
| 319 | } | 270 | } |
| 320 | 271 | ||
| 321 | void TranslatorVisitor::FMUL_reg(u64) { | ||
| 322 | ThrowNotImplemented(Opcode::FMUL_reg); | ||
| 323 | } | ||
| 324 | |||
| 325 | void TranslatorVisitor::FMUL_cbuf(u64) { | ||
| 326 | ThrowNotImplemented(Opcode::FMUL_cbuf); | ||
| 327 | } | ||
| 328 | |||
| 329 | void TranslatorVisitor::FMUL_imm(u64) { | ||
| 330 | ThrowNotImplemented(Opcode::FMUL_imm); | ||
| 331 | } | ||
| 332 | |||
| 333 | void TranslatorVisitor::FMUL32I(u64) { | ||
| 334 | ThrowNotImplemented(Opcode::FMUL32I); | ||
| 335 | } | ||
| 336 | |||
| 337 | void TranslatorVisitor::FSET_reg(u64) { | 272 | void TranslatorVisitor::FSET_reg(u64) { |
| 338 | ThrowNotImplemented(Opcode::FSET_reg); | 273 | ThrowNotImplemented(Opcode::FSET_reg); |
| 339 | } | 274 | } |
| @@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) { | |||
| 470 | ThrowNotImplemented(Opcode::I2I_imm); | 405 | ThrowNotImplemented(Opcode::I2I_imm); |
| 471 | } | 406 | } |
| 472 | 407 | ||
| 473 | void TranslatorVisitor::IADD_reg(u64) { | ||
| 474 | ThrowNotImplemented(Opcode::IADD_reg); | ||
| 475 | } | ||
| 476 | |||
| 477 | void TranslatorVisitor::IADD_cbuf(u64) { | ||
| 478 | ThrowNotImplemented(Opcode::IADD_cbuf); | ||
| 479 | } | ||
| 480 | |||
| 481 | void TranslatorVisitor::IADD_imm(u64) { | ||
| 482 | ThrowNotImplemented(Opcode::IADD_imm); | ||
| 483 | } | ||
| 484 | |||
| 485 | void TranslatorVisitor::IADD3_reg(u64) { | 408 | void TranslatorVisitor::IADD3_reg(u64) { |
| 486 | ThrowNotImplemented(Opcode::IADD3_reg); | 409 | ThrowNotImplemented(Opcode::IADD3_reg); |
| 487 | } | 410 | } |
| @@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) { | |||
| 494 | ThrowNotImplemented(Opcode::IADD3_imm); | 417 | ThrowNotImplemented(Opcode::IADD3_imm); |
| 495 | } | 418 | } |
| 496 | 419 | ||
| 497 | void TranslatorVisitor::IADD32I(u64) { | ||
| 498 | ThrowNotImplemented(Opcode::IADD32I); | ||
| 499 | } | ||
| 500 | |||
| 501 | void TranslatorVisitor::ICMP_reg(u64) { | 420 | void TranslatorVisitor::ICMP_reg(u64) { |
| 502 | ThrowNotImplemented(Opcode::ICMP_reg); | 421 | ThrowNotImplemented(Opcode::ICMP_reg); |
| 503 | } | 422 | } |
| @@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) { | |||
| 594 | ThrowNotImplemented(Opcode::ISBERD); | 513 | ThrowNotImplemented(Opcode::ISBERD); |
| 595 | } | 514 | } |
| 596 | 515 | ||
| 597 | void TranslatorVisitor::ISCADD_reg(u64) { | ||
| 598 | ThrowNotImplemented(Opcode::ISCADD_reg); | ||
| 599 | } | ||
| 600 | |||
| 601 | void TranslatorVisitor::ISCADD_cbuf(u64) { | ||
| 602 | ThrowNotImplemented(Opcode::ISCADD_cbuf); | ||
| 603 | } | ||
| 604 | |||
| 605 | void TranslatorVisitor::ISCADD_imm(u64) { | ||
| 606 | ThrowNotImplemented(Opcode::ISCADD_imm); | ||
| 607 | } | ||
| 608 | |||
| 609 | void TranslatorVisitor::ISCADD32I(u64) { | ||
| 610 | ThrowNotImplemented(Opcode::ISCADD32I); | ||
| 611 | } | ||
| 612 | |||
| 613 | void TranslatorVisitor::ISET_reg(u64) { | 516 | void TranslatorVisitor::ISET_reg(u64) { |
| 614 | ThrowNotImplemented(Opcode::ISET_reg); | 517 | ThrowNotImplemented(Opcode::ISET_reg); |
| 615 | } | 518 | } |
| @@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) { | |||
| 622 | ThrowNotImplemented(Opcode::ISET_imm); | 525 | ThrowNotImplemented(Opcode::ISET_imm); |
| 623 | } | 526 | } |
| 624 | 527 | ||
| 625 | void TranslatorVisitor::ISETP_reg(u64) { | ||
| 626 | ThrowNotImplemented(Opcode::ISETP_reg); | ||
| 627 | } | ||
| 628 | |||
| 629 | void TranslatorVisitor::ISETP_cbuf(u64) { | ||
| 630 | ThrowNotImplemented(Opcode::ISETP_cbuf); | ||
| 631 | } | ||
| 632 | |||
| 633 | void TranslatorVisitor::ISETP_imm(u64) { | ||
| 634 | ThrowNotImplemented(Opcode::ISETP_imm); | ||
| 635 | } | ||
| 636 | |||
| 637 | void TranslatorVisitor::JCAL(u64) { | 528 | void TranslatorVisitor::JCAL(u64) { |
| 638 | ThrowNotImplemented(Opcode::JCAL); | 529 | ThrowNotImplemented(Opcode::JCAL); |
| 639 | } | 530 | } |
| @@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) { | |||
| 658 | ThrowNotImplemented(Opcode::LDC); | 549 | ThrowNotImplemented(Opcode::LDC); |
| 659 | } | 550 | } |
| 660 | 551 | ||
| 661 | void TranslatorVisitor::LDG(u64) { | ||
| 662 | ThrowNotImplemented(Opcode::LDG); | ||
| 663 | } | ||
| 664 | |||
| 665 | void TranslatorVisitor::LDL(u64) { | 552 | void TranslatorVisitor::LDL(u64) { |
| 666 | ThrowNotImplemented(Opcode::LDL); | 553 | ThrowNotImplemented(Opcode::LDL); |
| 667 | } | 554 | } |
| @@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) { | |||
| 866 | ThrowNotImplemented(Opcode::RTT); | 753 | ThrowNotImplemented(Opcode::RTT); |
| 867 | } | 754 | } |
| 868 | 755 | ||
| 869 | void TranslatorVisitor::S2R(u64) { | ||
| 870 | ThrowNotImplemented(Opcode::S2R); | ||
| 871 | } | ||
| 872 | |||
| 873 | void TranslatorVisitor::SAM(u64) { | 756 | void TranslatorVisitor::SAM(u64) { |
| 874 | ThrowNotImplemented(Opcode::SAM); | 757 | ThrowNotImplemented(Opcode::SAM); |
| 875 | } | 758 | } |
| @@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) { | |||
| 914 | ThrowNotImplemented(Opcode::SHFL); | 797 | ThrowNotImplemented(Opcode::SHFL); |
| 915 | } | 798 | } |
| 916 | 799 | ||
| 917 | void TranslatorVisitor::SHL_reg(u64) { | ||
| 918 | ThrowNotImplemented(Opcode::SHL_reg); | ||
| 919 | } | ||
| 920 | |||
| 921 | void TranslatorVisitor::SHL_cbuf(u64) { | ||
| 922 | ThrowNotImplemented(Opcode::SHL_cbuf); | ||
| 923 | } | ||
| 924 | |||
| 925 | void TranslatorVisitor::SHL_imm(u64) { | ||
| 926 | ThrowNotImplemented(Opcode::SHL_imm); | ||
| 927 | } | ||
| 928 | |||
| 929 | void TranslatorVisitor::SHR_reg(u64) { | 800 | void TranslatorVisitor::SHR_reg(u64) { |
| 930 | ThrowNotImplemented(Opcode::SHR_reg); | 801 | ThrowNotImplemented(Opcode::SHR_reg); |
| 931 | } | 802 | } |
| @@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) { | |||
| 1086 | ThrowNotImplemented(Opcode::VSHR); | 957 | ThrowNotImplemented(Opcode::VSHR); |
| 1087 | } | 958 | } |
| 1088 | 959 | ||
| 1089 | void TranslatorVisitor::XMAD_reg(u64) { | ||
| 1090 | ThrowNotImplemented(Opcode::XMAD_reg); | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | void TranslatorVisitor::XMAD_rc(u64) { | ||
| 1094 | ThrowNotImplemented(Opcode::XMAD_rc); | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | void TranslatorVisitor::XMAD_cr(u64) { | ||
| 1098 | ThrowNotImplemented(Opcode::XMAD_cr); | ||
| 1099 | } | ||
| 1100 | |||
| 1101 | void TranslatorVisitor::XMAD_imm(u64) { | ||
| 1102 | ThrowNotImplemented(Opcode::XMAD_imm); | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | } // namespace Shader::Maxwell | 960 | } // namespace Shader::Maxwell |