diff options
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell/translate/impl')
91 files changed, 9761 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..d9f999e05 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | SAFEADD, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class AtomSize : u64 { | ||
| 25 | U32, | ||
| 26 | S32, | ||
| 27 | U64, | ||
| 28 | F32, | ||
| 29 | F16x2, | ||
| 30 | S64, | ||
| 31 | }; | ||
| 32 | |||
| 33 | IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, | ||
| 34 | AtomOp op, bool is_signed) { | ||
| 35 | switch (op) { | ||
| 36 | case AtomOp::ADD: | ||
| 37 | return ir.GlobalAtomicIAdd(offset, op_b); | ||
| 38 | case AtomOp::MIN: | ||
| 39 | return ir.GlobalAtomicIMin(offset, op_b, is_signed); | ||
| 40 | case AtomOp::MAX: | ||
| 41 | return ir.GlobalAtomicIMax(offset, op_b, is_signed); | ||
| 42 | case AtomOp::INC: | ||
| 43 | return ir.GlobalAtomicInc(offset, op_b); | ||
| 44 | case AtomOp::DEC: | ||
| 45 | return ir.GlobalAtomicDec(offset, op_b); | ||
| 46 | case AtomOp::AND: | ||
| 47 | return ir.GlobalAtomicAnd(offset, op_b); | ||
| 48 | case AtomOp::OR: | ||
| 49 | return ir.GlobalAtomicOr(offset, op_b); | ||
| 50 | case AtomOp::XOR: | ||
| 51 | return ir.GlobalAtomicXor(offset, op_b); | ||
| 52 | case AtomOp::EXCH: | ||
| 53 | return ir.GlobalAtomicExchange(offset, op_b); | ||
| 54 | default: | ||
| 55 | throw NotImplementedException("Integer Atom Operation {}", op); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, | ||
| 60 | AtomSize size) { | ||
| 61 | static constexpr IR::FpControl f16_control{ | ||
| 62 | .no_contraction = false, | ||
| 63 | .rounding = IR::FpRounding::RN, | ||
| 64 | .fmz_mode = IR::FmzMode::DontCare, | ||
| 65 | }; | ||
| 66 | static constexpr IR::FpControl f32_control{ | ||
| 67 | .no_contraction = false, | ||
| 68 | .rounding = IR::FpRounding::RN, | ||
| 69 | .fmz_mode = IR::FmzMode::FTZ, | ||
| 70 | }; | ||
| 71 | switch (op) { | ||
| 72 | case AtomOp::ADD: | ||
| 73 | return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) | ||
| 74 | : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); | ||
| 75 | case AtomOp::MIN: | ||
| 76 | return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); | ||
| 77 | case AtomOp::MAX: | ||
| 78 | return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); | ||
| 79 | default: | ||
| 80 | throw NotImplementedException("FP Atom Operation {}", op); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 88 | BitField<28, 20, s64> addr_offset; | ||
| 89 | BitField<28, 20, u64> rz_addr_offset; | ||
| 90 | BitField<48, 1, u64> e; | ||
| 91 | } const mem{insn}; | ||
| 92 | |||
| 93 | const IR::U64 address{[&]() -> IR::U64 { | ||
| 94 | if (mem.e == 0) { | ||
| 95 | return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||
| 96 | } | ||
| 97 | return v.L(mem.addr_reg); | ||
| 98 | }()}; | ||
| 99 | const u64 addr_offset{[&]() -> u64 { | ||
| 100 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 101 | // When RZ is used, the address is an absolute address | ||
| 102 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 103 | } else { | ||
| 104 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 105 | } | ||
| 106 | }()}; | ||
| 107 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 108 | } | ||
| 109 | |||
| 110 | bool AtomOpNotApplicable(AtomSize size, AtomOp op) { | ||
| 111 | // TODO: SAFEADD | ||
| 112 | switch (size) { | ||
| 113 | case AtomSize::S32: | ||
| 114 | case AtomSize::U64: | ||
| 115 | return (op == AtomOp::INC || op == AtomOp::DEC); | ||
| 116 | case AtomSize::S64: | ||
| 117 | return !(op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 118 | case AtomSize::F32: | ||
| 119 | return op != AtomOp::ADD; | ||
| 120 | case AtomSize::F16x2: | ||
| 121 | return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 122 | default: | ||
| 123 | return false; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { | ||
| 128 | switch (size) { | ||
| 129 | case AtomSize::U32: | ||
| 130 | case AtomSize::S32: | ||
| 131 | case AtomSize::F32: | ||
| 132 | case AtomSize::F16x2: | ||
| 133 | return ir.LoadGlobal32(offset); | ||
| 134 | case AtomSize::U64: | ||
| 135 | case AtomSize::S64: | ||
| 136 | return ir.PackUint2x32(ir.LoadGlobal64(offset)); | ||
| 137 | default: | ||
| 138 | throw NotImplementedException("Atom Size {}", size); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { | ||
| 143 | switch (size) { | ||
| 144 | case AtomSize::U32: | ||
| 145 | case AtomSize::S32: | ||
| 146 | case AtomSize::F16x2: | ||
| 147 | return v.X(dest_reg, IR::U32{result}); | ||
| 148 | case AtomSize::U64: | ||
| 149 | case AtomSize::S64: | ||
| 150 | return v.L(dest_reg, IR::U64{result}); | ||
| 151 | case AtomSize::F32: | ||
| 152 | return v.F(dest_reg, IR::F32{result}); | ||
| 153 | default: | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset, | ||
| 159 | AtomSize size, AtomOp op) { | ||
| 160 | switch (size) { | ||
| 161 | case AtomSize::U32: | ||
| 162 | case AtomSize::S32: | ||
| 163 | return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32); | ||
| 164 | case AtomSize::U64: | ||
| 165 | case AtomSize::S64: | ||
| 166 | return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64); | ||
| 167 | case AtomSize::F32: | ||
| 168 | return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size); | ||
| 169 | case AtomSize::F16x2: { | ||
| 170 | return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size); | ||
| 171 | } | ||
| 172 | default: | ||
| 173 | throw NotImplementedException("Atom Size {}", size); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, | ||
| 178 | const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) { | ||
| 179 | IR::Value result; | ||
| 180 | if (AtomOpNotApplicable(size, op)) { | ||
| 181 | result = LoadGlobal(v.ir, offset, size); | ||
| 182 | } else { | ||
| 183 | result = ApplyAtomOp(v, operand_reg, offset, size, op); | ||
| 184 | } | ||
| 185 | if (write_dest) { | ||
| 186 | StoreResult(v, dest_reg, result, size); | ||
| 187 | } | ||
| 188 | } | ||
| 189 | } // Anonymous namespace | ||
| 190 | |||
| 191 | void TranslatorVisitor::ATOM(u64 insn) { | ||
| 192 | union { | ||
| 193 | u64 raw; | ||
| 194 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 195 | BitField<20, 8, IR::Reg> operand_reg; | ||
| 196 | BitField<49, 3, AtomSize> size; | ||
| 197 | BitField<52, 4, AtomOp> op; | ||
| 198 | } const atom{insn}; | ||
| 199 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 200 | GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true); | ||
| 201 | } | ||
| 202 | |||
| 203 | void TranslatorVisitor::RED(u64 insn) { | ||
| 204 | union { | ||
| 205 | u64 raw; | ||
| 206 | BitField<0, 8, IR::Reg> operand_reg; | ||
| 207 | BitField<20, 3, AtomSize> size; | ||
| 208 | BitField<23, 3, AtomOp> op; | ||
| 209 | } const red{insn}; | ||
| 210 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 211 | GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true); | ||
| 212 | } | ||
| 213 | |||
| 214 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum class AtomsSize : u64 { | ||
| 24 | U32, | ||
| 25 | S32, | ||
| 26 | U64, | ||
| 27 | }; | ||
| 28 | |||
| 29 | IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, | ||
| 30 | bool is_signed) { | ||
| 31 | switch (op) { | ||
| 32 | case AtomOp::ADD: | ||
| 33 | return ir.SharedAtomicIAdd(offset, op_b); | ||
| 34 | case AtomOp::MIN: | ||
| 35 | return ir.SharedAtomicIMin(offset, op_b, is_signed); | ||
| 36 | case AtomOp::MAX: | ||
| 37 | return ir.SharedAtomicIMax(offset, op_b, is_signed); | ||
| 38 | case AtomOp::INC: | ||
| 39 | return ir.SharedAtomicInc(offset, op_b); | ||
| 40 | case AtomOp::DEC: | ||
| 41 | return ir.SharedAtomicDec(offset, op_b); | ||
| 42 | case AtomOp::AND: | ||
| 43 | return ir.SharedAtomicAnd(offset, op_b); | ||
| 44 | case AtomOp::OR: | ||
| 45 | return ir.SharedAtomicOr(offset, op_b); | ||
| 46 | case AtomOp::XOR: | ||
| 47 | return ir.SharedAtomicXor(offset, op_b); | ||
| 48 | case AtomOp::EXCH: | ||
| 49 | return ir.SharedAtomicExchange(offset, op_b); | ||
| 50 | default: | ||
| 51 | throw NotImplementedException("Integer Atoms Operation {}", op); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { | ||
| 56 | union { | ||
| 57 | u64 raw; | ||
| 58 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 59 | BitField<30, 22, u64> absolute_offset; | ||
| 60 | BitField<30, 22, s64> relative_offset; | ||
| 61 | } const encoding{insn}; | ||
| 62 | |||
| 63 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 64 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2)); | ||
| 65 | } else { | ||
| 66 | const s32 relative{static_cast<s32>(encoding.relative_offset << 2)}; | ||
| 67 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { | ||
| 72 | switch (size) { | ||
| 73 | case AtomsSize::U32: | ||
| 74 | case AtomsSize::S32: | ||
| 75 | return v.X(dest_reg, IR::U32{result}); | ||
| 76 | case AtomsSize::U64: | ||
| 77 | return v.L(dest_reg, IR::U64{result}); | ||
| 78 | default: | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | } // Anonymous namespace | ||
| 83 | |||
| 84 | void TranslatorVisitor::ATOMS(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 89 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 90 | BitField<28, 2, AtomsSize> size; | ||
| 91 | BitField<52, 4, AtomOp> op; | ||
| 92 | } const atoms{insn}; | ||
| 93 | |||
| 94 | const bool size_64{atoms.size == AtomsSize::U64}; | ||
| 95 | if (size_64 && atoms.op != AtomOp::EXCH) { | ||
| 96 | throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); | ||
| 97 | } | ||
| 98 | const bool is_signed{atoms.size == AtomsSize::S32}; | ||
| 99 | const IR::U32 offset{AtomsOffset(*this, insn)}; | ||
| 100 | |||
| 101 | IR::Value result; | ||
| 102 | if (size_64) { | ||
| 103 | result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); | ||
| 104 | } else { | ||
| 105 | result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); | ||
| 106 | } | ||
| 107 | StoreResult(*this, atoms.dest_reg, result, atoms.size); | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp new file mode 100644 index 000000000..fb3f00d3f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | enum class BitSize : u64 { | ||
| 13 | B32, | ||
| 14 | B64, | ||
| 15 | B96, | ||
| 16 | B128, | ||
| 17 | }; | ||
| 18 | |||
| 19 | void TranslatorVisitor::AL2P(u64 inst) { | ||
| 20 | union { | ||
| 21 | u64 raw; | ||
| 22 | BitField<0, 8, IR::Reg> result_register; | ||
| 23 | BitField<8, 8, IR::Reg> indexing_register; | ||
| 24 | BitField<20, 11, s64> offset; | ||
| 25 | BitField<47, 2, BitSize> bitsize; | ||
| 26 | } al2p{inst}; | ||
| 27 | if (al2p.bitsize != BitSize::B32) { | ||
| 28 | throw NotImplementedException("BitSize {}", al2p.bitsize.Value()); | ||
| 29 | } | ||
| 30 | const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))}; | ||
| 31 | const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)}; | ||
| 32 | X(al2p.result_register, result); | ||
| 33 | } | ||
| 34 | |||
| 35 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp new file mode 100644 index 000000000..86e433e41 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | // Seems to be in CUDA terminology. | ||
| 14 | enum class LocalScope : u64 { | ||
| 15 | CTA, | ||
| 16 | GL, | ||
| 17 | SYS, | ||
| 18 | VC, | ||
| 19 | }; | ||
| 20 | } // Anonymous namespace | ||
| 21 | |||
| 22 | void TranslatorVisitor::MEMBAR(u64 inst) { | ||
| 23 | union { | ||
| 24 | u64 raw; | ||
| 25 | BitField<8, 2, LocalScope> scope; | ||
| 26 | } const membar{inst}; | ||
| 27 | |||
| 28 | if (membar.scope == LocalScope::CTA) { | ||
| 29 | ir.WorkgroupMemoryBarrier(); | ||
| 30 | } else { | ||
| 31 | ir.DeviceMemoryBarrier(); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void TranslatorVisitor::DEPBAR() { | ||
| 36 | // DEPBAR is a no-op | ||
| 37 | } | ||
| 38 | |||
| 39 | void TranslatorVisitor::BAR(u64 insn) { | ||
| 40 | enum class Mode { | ||
| 41 | RedPopc, | ||
| 42 | Scan, | ||
| 43 | RedAnd, | ||
| 44 | RedOr, | ||
| 45 | Sync, | ||
| 46 | Arrive, | ||
| 47 | }; | ||
| 48 | union { | ||
| 49 | u64 raw; | ||
| 50 | BitField<43, 1, u64> is_a_imm; | ||
| 51 | BitField<44, 1, u64> is_b_imm; | ||
| 52 | BitField<8, 8, u64> imm_a; | ||
| 53 | BitField<20, 12, u64> imm_b; | ||
| 54 | BitField<42, 1, u64> neg_pred; | ||
| 55 | BitField<39, 3, IR::Pred> pred; | ||
| 56 | } const bar{insn}; | ||
| 57 | |||
| 58 | const Mode mode{[insn] { | ||
| 59 | switch (insn & 0x0000009B00000000ULL) { | ||
| 60 | case 0x0000000200000000ULL: | ||
| 61 | return Mode::RedPopc; | ||
| 62 | case 0x0000000300000000ULL: | ||
| 63 | return Mode::Scan; | ||
| 64 | case 0x0000000A00000000ULL: | ||
| 65 | return Mode::RedAnd; | ||
| 66 | case 0x0000001200000000ULL: | ||
| 67 | return Mode::RedOr; | ||
| 68 | case 0x0000008000000000ULL: | ||
| 69 | return Mode::Sync; | ||
| 70 | case 0x0000008100000000ULL: | ||
| 71 | return Mode::Arrive; | ||
| 72 | } | ||
| 73 | throw NotImplementedException("Invalid encoding"); | ||
| 74 | }()}; | ||
| 75 | if (mode != Mode::Sync) { | ||
| 76 | throw NotImplementedException("BAR mode {}", mode); | ||
| 77 | } | ||
| 78 | if (bar.is_a_imm == 0) { | ||
| 79 | throw NotImplementedException("Non-immediate input A"); | ||
| 80 | } | ||
| 81 | if (bar.imm_a != 0) { | ||
| 82 | throw NotImplementedException("Non-zero input A"); | ||
| 83 | } | ||
| 84 | if (bar.is_b_imm == 0) { | ||
| 85 | throw NotImplementedException("Non-immediate input B"); | ||
| 86 | } | ||
| 87 | if (bar.imm_b != 0) { | ||
| 88 | throw NotImplementedException("Non-zero input B"); | ||
| 89 | } | ||
| 90 | if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) { | ||
| 91 | throw NotImplementedException("Non-true input predicate"); | ||
| 92 | } | ||
| 93 | ir.Barrier(); | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp new file mode 100644 index 000000000..9d5a87e52 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 16 | BitField<40, 1, u64> brev; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 1, u64> is_signed; | ||
| 19 | } const bfe{insn}; | ||
| 20 | |||
| 21 | const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; | ||
| 22 | const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; | ||
| 23 | |||
| 24 | // Common constants | ||
| 25 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 26 | const IR::U32 one{v.ir.Imm32(1)}; | ||
| 27 | const IR::U32 max_size{v.ir.Imm32(32)}; | ||
| 28 | // Edge case conditions | ||
| 29 | const IR::U1 zero_count{v.ir.IEqual(count, zero)}; | ||
| 30 | const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)}; | ||
| 31 | const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)}; | ||
| 32 | |||
| 33 | IR::U32 base{v.X(bfe.offset_reg)}; | ||
| 34 | if (bfe.brev != 0) { | ||
| 35 | base = v.ir.BitReverse(base); | ||
| 36 | } | ||
| 37 | IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)}; | ||
| 38 | if (bfe.is_signed != 0) { | ||
| 39 | const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)}; | ||
| 40 | const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; | ||
| 41 | const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)}; | ||
| 42 | // Replicate condition | ||
| 43 | result = IR::U32{v.ir.Select(replicate, replicated_bit, result)}; | ||
| 44 | // Exceeding condition | ||
| 45 | const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)}; | ||
| 46 | result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)}; | ||
| 47 | } | ||
| 48 | // Zero count condition | ||
| 49 | result = IR::U32{v.ir.Select(zero_count, zero, result)}; | ||
| 50 | |||
| 51 | v.X(bfe.dest_reg, result); | ||
| 52 | |||
| 53 | if (bfe.cc != 0) { | ||
| 54 | v.SetZFlag(v.ir.IEqual(result, zero)); | ||
| 55 | v.SetSFlag(v.ir.ILessThan(result, zero, true)); | ||
| 56 | v.ResetCFlag(); | ||
| 57 | v.ResetOFlag(); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | } // Anonymous namespace | ||
| 61 | |||
| 62 | void TranslatorVisitor::BFE_reg(u64 insn) { | ||
| 63 | BFE(*this, insn, GetReg20(insn)); | ||
| 64 | } | ||
| 65 | |||
| 66 | void TranslatorVisitor::BFE_cbuf(u64 insn) { | ||
| 67 | BFE(*this, insn, GetCbuf(insn)); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::BFE_imm(u64 insn) { | ||
| 71 | BFE(*this, insn, GetImm20(insn)); | ||
| 72 | } | ||
| 73 | |||
| 74 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp new file mode 100644 index 000000000..1e1ec2119 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> insert_reg; | ||
| 16 | BitField<47, 1, u64> cc; | ||
| 17 | } const bfi{insn}; | ||
| 18 | |||
| 19 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 20 | const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)}; | ||
| 21 | const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; | ||
| 22 | const IR::U32 max_size{v.ir.Imm32(32)}; | ||
| 23 | |||
| 24 | // Edge case conditions | ||
| 25 | const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; | ||
| 26 | const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)}; | ||
| 27 | |||
| 28 | const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; | ||
| 29 | const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; | ||
| 30 | |||
| 31 | const IR::U32 insert{v.X(bfi.insert_reg)}; | ||
| 32 | IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; | ||
| 33 | |||
| 34 | result = IR::U32{v.ir.Select(exceed_offset, base, result)}; | ||
| 35 | |||
| 36 | v.X(bfi.dest_reg, result); | ||
| 37 | if (bfi.cc != 0) { | ||
| 38 | v.SetZFlag(v.ir.IEqual(result, zero)); | ||
| 39 | v.SetSFlag(v.ir.ILessThan(result, zero, true)); | ||
| 40 | v.ResetCFlag(); | ||
| 41 | v.ResetOFlag(); | ||
| 42 | } | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::BFI_reg(u64 insn) { | ||
| 47 | BFI(*this, insn, GetReg20(insn), GetReg39(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::BFI_rc(u64 insn) { | ||
| 51 | BFI(*this, insn, GetReg39(insn), GetCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::BFI_cr(u64 insn) { | ||
| 55 | BFI(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | void TranslatorVisitor::BFI_imm(u64 insn) { | ||
| 59 | BFI(*this, insn, GetImm20(insn), GetReg39(insn)); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 000000000..371c0e0f7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void Check(u64 insn) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<5, 1, u64> cbuf_mode; | ||
| 16 | BitField<6, 1, u64> lmt; | ||
| 17 | } const encoding{insn}; | ||
| 18 | |||
| 19 | if (encoding.cbuf_mode != 0) { | ||
| 20 | throw NotImplementedException("Constant buffer mode"); | ||
| 21 | } | ||
| 22 | if (encoding.lmt != 0) { | ||
| 23 | throw NotImplementedException("LMT"); | ||
| 24 | } | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 28 | void TranslatorVisitor::BRX(u64 insn) { | ||
| 29 | Check(insn); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::JMX(u64 insn) { | ||
| 33 | Check(insn); | ||
| 34 | } | ||
| 35 | |||
| 36 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..fd73f656c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h | |||
| @@ -0,0 +1,57 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | enum class FpRounding : u64 { | ||
| 15 | RN, | ||
| 16 | RM, | ||
| 17 | RP, | ||
| 18 | RZ, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class FmzMode : u64 { | ||
| 22 | None, | ||
| 23 | FTZ, | ||
| 24 | FMZ, | ||
| 25 | INVALIDFMZ3, | ||
| 26 | }; | ||
| 27 | |||
| 28 | inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { | ||
| 29 | switch (fp_rounding) { | ||
| 30 | case FpRounding::RN: | ||
| 31 | return IR::FpRounding::RN; | ||
| 32 | case FpRounding::RM: | ||
| 33 | return IR::FpRounding::RM; | ||
| 34 | case FpRounding::RP: | ||
| 35 | return IR::FpRounding::RP; | ||
| 36 | case FpRounding::RZ: | ||
| 37 | return IR::FpRounding::RZ; | ||
| 38 | } | ||
| 39 | throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); | ||
| 40 | } | ||
| 41 | |||
| 42 | inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { | ||
| 43 | switch (fmz_mode) { | ||
| 44 | case FmzMode::None: | ||
| 45 | return IR::FmzMode::None; | ||
| 46 | case FmzMode::FTZ: | ||
| 47 | return IR::FmzMode::FTZ; | ||
| 48 | case FmzMode::FMZ: | ||
| 49 | // FMZ is manually handled in the instruction | ||
| 50 | return IR::FmzMode::FTZ; | ||
| 51 | case FmzMode::INVALIDFMZ3: | ||
| 52 | break; | ||
| 53 | } | ||
| 54 | throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); | ||
| 55 | } | ||
| 56 | |||
| 57 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp new file mode 100644 index 000000000..20458d2ad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp | |||
| @@ -0,0 +1,153 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 9 | CompareOp compare_op, bool is_signed) { | ||
| 10 | switch (compare_op) { | ||
| 11 | case CompareOp::False: | ||
| 12 | return ir.Imm1(false); | ||
| 13 | case CompareOp::LessThan: | ||
| 14 | return ir.ILessThan(operand_1, operand_2, is_signed); | ||
| 15 | case CompareOp::Equal: | ||
| 16 | return ir.IEqual(operand_1, operand_2); | ||
| 17 | case CompareOp::LessThanEqual: | ||
| 18 | return ir.ILessThanEqual(operand_1, operand_2, is_signed); | ||
| 19 | case CompareOp::GreaterThan: | ||
| 20 | return ir.IGreaterThan(operand_1, operand_2, is_signed); | ||
| 21 | case CompareOp::NotEqual: | ||
| 22 | return ir.INotEqual(operand_1, operand_2); | ||
| 23 | case CompareOp::GreaterThanEqual: | ||
| 24 | return ir.IGreaterThanEqual(operand_1, operand_2, is_signed); | ||
| 25 | case CompareOp::True: | ||
| 26 | return ir.Imm1(true); | ||
| 27 | default: | ||
| 28 | throw NotImplementedException("Invalid compare op {}", compare_op); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 33 | CompareOp compare_op, bool is_signed) { | ||
| 34 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 35 | const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; | ||
| 36 | const IR::U1 z_flag{ir.GetZFlag()}; | ||
| 37 | const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; | ||
| 38 | const IR::U1 flip_logic{is_signed ? ir.Imm1(false) | ||
| 39 | : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), | ||
| 40 | ir.ILessThan(operand_2, zero, true))}; | ||
| 41 | switch (compare_op) { | ||
| 42 | case CompareOp::False: | ||
| 43 | return ir.Imm1(false); | ||
| 44 | case CompareOp::LessThan: | ||
| 45 | return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), | ||
| 46 | ir.ILessThan(intermediate, zero, true))}; | ||
| 47 | case CompareOp::Equal: | ||
| 48 | return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); | ||
| 49 | case CompareOp::LessThanEqual: { | ||
| 50 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), | ||
| 51 | ir.ILessThan(intermediate, zero, true))}; | ||
| 52 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); | ||
| 53 | } | ||
| 54 | case CompareOp::GreaterThan: { | ||
| 55 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), | ||
| 56 | ir.IGreaterThan(intermediate, zero, true))}; | ||
| 57 | const IR::U1 not_z{ir.LogicalNot(z_flag)}; | ||
| 58 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); | ||
| 59 | } | ||
| 60 | case CompareOp::NotEqual: | ||
| 61 | return ir.LogicalOr(ir.INotEqual(intermediate, zero), | ||
| 62 | ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); | ||
| 63 | case CompareOp::GreaterThanEqual: { | ||
| 64 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), | ||
| 65 | ir.IGreaterThanEqual(intermediate, zero, true))}; | ||
| 66 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); | ||
| 67 | } | ||
| 68 | case CompareOp::True: | ||
| 69 | return ir.Imm1(true); | ||
| 70 | default: | ||
| 71 | throw NotImplementedException("Invalid compare op {}", compare_op); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, | ||
| 76 | BooleanOp bop) { | ||
| 77 | switch (bop) { | ||
| 78 | case BooleanOp::AND: | ||
| 79 | return ir.LogicalAnd(predicate_1, predicate_2); | ||
| 80 | case BooleanOp::OR: | ||
| 81 | return ir.LogicalOr(predicate_1, predicate_2); | ||
| 82 | case BooleanOp::XOR: | ||
| 83 | return ir.LogicalXor(predicate_1, predicate_2); | ||
| 84 | default: | ||
| 85 | throw NotImplementedException("Invalid bop {}", bop); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) { | ||
| 90 | switch (op) { | ||
| 91 | case PredicateOp::False: | ||
| 92 | return ir.Imm1(false); | ||
| 93 | case PredicateOp::True: | ||
| 94 | return ir.Imm1(true); | ||
| 95 | case PredicateOp::Zero: | ||
| 96 | return ir.IEqual(result, ir.Imm32(0)); | ||
| 97 | case PredicateOp::NonZero: | ||
| 98 | return ir.INotEqual(result, ir.Imm32(0)); | ||
| 99 | default: | ||
| 100 | throw NotImplementedException("Invalid Predicate operation {}", op); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | bool IsCompareOpOrdered(FPCompareOp op) { | ||
| 105 | switch (op) { | ||
| 106 | case FPCompareOp::LTU: | ||
| 107 | case FPCompareOp::EQU: | ||
| 108 | case FPCompareOp::LEU: | ||
| 109 | case FPCompareOp::GTU: | ||
| 110 | case FPCompareOp::NEU: | ||
| 111 | case FPCompareOp::GEU: | ||
| 112 | return false; | ||
| 113 | default: | ||
| 114 | return true; | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, | ||
| 119 | const IR::F16F32F64& operand_2, FPCompareOp compare_op, | ||
| 120 | IR::FpControl control) { | ||
| 121 | const bool ordered{IsCompareOpOrdered(compare_op)}; | ||
| 122 | switch (compare_op) { | ||
| 123 | case FPCompareOp::F: | ||
| 124 | return ir.Imm1(false); | ||
| 125 | case FPCompareOp::LT: | ||
| 126 | case FPCompareOp::LTU: | ||
| 127 | return ir.FPLessThan(operand_1, operand_2, control, ordered); | ||
| 128 | case FPCompareOp::EQ: | ||
| 129 | case FPCompareOp::EQU: | ||
| 130 | return ir.FPEqual(operand_1, operand_2, control, ordered); | ||
| 131 | case FPCompareOp::LE: | ||
| 132 | case FPCompareOp::LEU: | ||
| 133 | return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); | ||
| 134 | case FPCompareOp::GT: | ||
| 135 | case FPCompareOp::GTU: | ||
| 136 | return ir.FPGreaterThan(operand_1, operand_2, control, ordered); | ||
| 137 | case FPCompareOp::NE: | ||
| 138 | case FPCompareOp::NEU: | ||
| 139 | return ir.FPNotEqual(operand_1, operand_2, control, ordered); | ||
| 140 | case FPCompareOp::GE: | ||
| 141 | case FPCompareOp::GEU: | ||
| 142 | return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); | ||
| 143 | case FPCompareOp::NUM: | ||
| 144 | return ir.FPOrdered(operand_1, operand_2); | ||
| 145 | case FPCompareOp::Nan: | ||
| 146 | return ir.FPUnordered(operand_1, operand_2); | ||
| 147 | case FPCompareOp::T: | ||
| 148 | return ir.Imm1(true); | ||
| 149 | default: | ||
| 150 | throw NotImplementedException("Invalid FP compare op {}", compare_op); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h new file mode 100644 index 000000000..214d0af3c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | [[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 12 | const IR::U32& operand_2, CompareOp compare_op, bool is_signed); | ||
| 13 | |||
| 14 | [[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 15 | const IR::U32& operand_2, CompareOp compare_op, | ||
| 16 | bool is_signed); | ||
| 17 | |||
| 18 | [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, | ||
| 19 | const IR::U1& predicate_2, BooleanOp bop); | ||
| 20 | |||
| 21 | [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); | ||
| 22 | |||
| 23 | [[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); | ||
| 24 | |||
| 25 | [[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, | ||
| 26 | const IR::F16F32F64& operand_2, FPCompareOp compare_op, | ||
| 27 | IR::FpControl control = {}); | ||
| 28 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp new file mode 100644 index 000000000..420f2fb94 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | void TranslatorVisitor::CSET(u64 insn) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 5, IR::FlowTest> cc_test; | ||
| 17 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 18 | BitField<42, 1, u64> neg_bop_pred; | ||
| 19 | BitField<44, 1, u64> bf; | ||
| 20 | BitField<45, 2, BooleanOp> bop; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | } const cset{insn}; | ||
| 23 | |||
| 24 | const IR::U32 one_mask{ir.Imm32(-1)}; | ||
| 25 | const IR::U32 fp_one{ir.Imm32(0x3f800000)}; | ||
| 26 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 27 | const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; | ||
| 28 | const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; | ||
| 29 | const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; | ||
| 30 | const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; | ||
| 31 | const IR::U32 result{ir.Select(pred_result, pass_result, zero)}; | ||
| 32 | X(cset.dest_reg, result); | ||
| 33 | if (cset.cc != 0) { | ||
| 34 | const IR::U1 is_zero{ir.IEqual(result, zero)}; | ||
| 35 | SetZFlag(is_zero); | ||
| 36 | if (cset.bf != 0) { | ||
| 37 | ResetSFlag(); | ||
| 38 | } else { | ||
| 39 | SetSFlag(ir.LogicalNot(is_zero)); | ||
| 40 | } | ||
| 41 | ResetOFlag(); | ||
| 42 | ResetCFlag(); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::CSETP(u64 insn) { | ||
| 47 | union { | ||
| 48 | u64 raw; | ||
| 49 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 50 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 51 | BitField<8, 5, IR::FlowTest> cc_test; | ||
| 52 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 53 | BitField<42, 1, u64> neg_bop_pred; | ||
| 54 | BitField<45, 2, BooleanOp> bop; | ||
| 55 | } const csetp{insn}; | ||
| 56 | |||
| 57 | const BooleanOp bop{csetp.bop}; | ||
| 58 | const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)}; | ||
| 59 | const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)}; | ||
| 60 | const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)}; | ||
| 61 | const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)}; | ||
| 62 | ir.SetPred(csetp.dest_pred_a, result_a); | ||
| 63 | ir.SetPred(csetp.dest_pred_b, result_b); | ||
| 64 | } | ||
| 65 | |||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp new file mode 100644 index 000000000..5a1b3a8fc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<45, 1, u64> neg_b; | ||
| 20 | BitField<46, 1, u64> abs_a; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | BitField<48, 1, u64> neg_a; | ||
| 23 | BitField<49, 1, u64> abs_b; | ||
| 24 | } const dadd{insn}; | ||
| 25 | if (dadd.cc != 0) { | ||
| 26 | throw NotImplementedException("DADD CC"); | ||
| 27 | } | ||
| 28 | |||
| 29 | const IR::F64 src_a{v.D(dadd.src_a_reg)}; | ||
| 30 | const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; | ||
| 31 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; | ||
| 32 | |||
| 33 | const IR::FpControl control{ | ||
| 34 | .no_contraction = true, | ||
| 35 | .rounding = CastFpRounding(dadd.fp_rounding), | ||
| 36 | .fmz_mode = IR::FmzMode::None, | ||
| 37 | }; | ||
| 38 | |||
| 39 | v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); | ||
| 40 | } | ||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | void TranslatorVisitor::DADD_reg(u64 insn) { | ||
| 44 | DADD(*this, insn, GetDoubleReg20(insn)); | ||
| 45 | } | ||
| 46 | |||
| 47 | void TranslatorVisitor::DADD_cbuf(u64 insn) { | ||
| 48 | DADD(*this, insn, GetDoubleCbuf(insn)); | ||
| 49 | } | ||
| 50 | |||
| 51 | void TranslatorVisitor::DADD_imm(u64 insn) { | ||
| 52 | DADD(*this, insn, GetDoubleImm20(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp new file mode 100644 index 000000000..1173192e4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | BitField<43, 1, u64> negate_a; | ||
| 20 | BitField<44, 1, u64> abs_b; | ||
| 21 | BitField<45, 2, BooleanOp> bop; | ||
| 22 | BitField<47, 1, u64> cc; | ||
| 23 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 24 | BitField<52, 1, u64> bf; | ||
| 25 | BitField<53, 1, u64> negate_b; | ||
| 26 | BitField<54, 1, u64> abs_a; | ||
| 27 | } const dset{insn}; | ||
| 28 | |||
| 29 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)}; | ||
| 30 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)}; | ||
| 31 | |||
| 32 | IR::U1 pred{v.ir.GetPred(dset.pred)}; | ||
| 33 | if (dset.neg_pred != 0) { | ||
| 34 | pred = v.ir.LogicalNot(pred); | ||
| 35 | } | ||
| 36 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)}; | ||
| 37 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)}; | ||
| 38 | |||
| 39 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 40 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 41 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 42 | const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; | ||
| 43 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 44 | |||
| 45 | v.X(dset.dest_reg, result); | ||
| 46 | if (dset.cc != 0) { | ||
| 47 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 48 | v.SetZFlag(is_zero); | ||
| 49 | if (dset.bf != 0) { | ||
| 50 | v.ResetSFlag(); | ||
| 51 | } else { | ||
| 52 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 53 | } | ||
| 54 | v.ResetCFlag(); | ||
| 55 | v.ResetOFlag(); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | void TranslatorVisitor::DSET_reg(u64 insn) { | ||
| 61 | DSET(*this, insn, GetDoubleReg20(insn)); | ||
| 62 | } | ||
| 63 | |||
| 64 | void TranslatorVisitor::DSET_cbuf(u64 insn) { | ||
| 65 | DSET(*this, insn, GetDoubleCbuf(insn)); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::DSET_imm(u64 insn) { | ||
| 69 | DSET(*this, insn, GetDoubleImm20(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp new file mode 100644 index 000000000..f66097014 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<50, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> neg_b; | ||
| 21 | BitField<49, 1, u64> neg_c; | ||
| 22 | } const dfma{insn}; | ||
| 23 | |||
| 24 | if (dfma.cc != 0) { | ||
| 25 | throw NotImplementedException("DFMA CC"); | ||
| 26 | } | ||
| 27 | |||
| 28 | const IR::F64 src_a{v.D(dfma.src_a_reg)}; | ||
| 29 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; | ||
| 30 | const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; | ||
| 31 | |||
| 32 | const IR::FpControl control{ | ||
| 33 | .no_contraction = true, | ||
| 34 | .rounding = CastFpRounding(dfma.fp_rounding), | ||
| 35 | .fmz_mode = IR::FmzMode::None, | ||
| 36 | }; | ||
| 37 | |||
| 38 | v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); | ||
| 39 | } | ||
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | void TranslatorVisitor::DFMA_reg(u64 insn) { | ||
| 43 | DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DFMA_cr(u64 insn) { | ||
| 47 | DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::DFMA_rc(u64 insn) { | ||
| 51 | DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::DFMA_imm(u64 insn) { | ||
| 55 | DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp new file mode 100644 index 000000000..6b551847c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<45, 1, u64> negate_b; | ||
| 19 | BitField<46, 1, u64> abs_a; | ||
| 20 | BitField<47, 1, u64> cc; | ||
| 21 | BitField<48, 1, u64> negate_a; | ||
| 22 | BitField<49, 1, u64> abs_b; | ||
| 23 | } const dmnmx{insn}; | ||
| 24 | |||
| 25 | if (dmnmx.cc != 0) { | ||
| 26 | throw NotImplementedException("DMNMX CC"); | ||
| 27 | } | ||
| 28 | |||
| 29 | const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; | ||
| 30 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; | ||
| 31 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; | ||
| 32 | |||
| 33 | IR::F64 max{v.ir.FPMax(op_a, op_b)}; | ||
| 34 | IR::F64 min{v.ir.FPMin(op_a, op_b)}; | ||
| 35 | |||
| 36 | if (dmnmx.neg_pred != 0) { | ||
| 37 | std::swap(min, max); | ||
| 38 | } | ||
| 39 | v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)}); | ||
| 40 | } | ||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | void TranslatorVisitor::DMNMX_reg(u64 insn) { | ||
| 44 | DMNMX(*this, insn, GetDoubleReg20(insn)); | ||
| 45 | } | ||
| 46 | |||
| 47 | void TranslatorVisitor::DMNMX_cbuf(u64 insn) { | ||
| 48 | DMNMX(*this, insn, GetDoubleCbuf(insn)); | ||
| 49 | } | ||
| 50 | |||
| 51 | void TranslatorVisitor::DMNMX_imm(u64 insn) { | ||
| 52 | DMNMX(*this, insn, GetDoubleImm20(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp new file mode 100644 index 000000000..c0159fb65 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> neg; | ||
| 21 | } const dmul{insn}; | ||
| 22 | |||
| 23 | if (dmul.cc != 0) { | ||
| 24 | throw NotImplementedException("DMUL CC"); | ||
| 25 | } | ||
| 26 | |||
| 27 | const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; | ||
| 28 | const IR::FpControl control{ | ||
| 29 | .no_contraction = true, | ||
| 30 | .rounding = CastFpRounding(dmul.fp_rounding), | ||
| 31 | .fmz_mode = IR::FmzMode::None, | ||
| 32 | }; | ||
| 33 | |||
| 34 | v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); | ||
| 35 | } | ||
| 36 | } // Anonymous namespace | ||
| 37 | |||
| 38 | void TranslatorVisitor::DMUL_reg(u64 insn) { | ||
| 39 | DMUL(*this, insn, GetDoubleReg20(insn)); | ||
| 40 | } | ||
| 41 | |||
| 42 | void TranslatorVisitor::DMUL_cbuf(u64 insn) { | ||
| 43 | DMUL(*this, insn, GetDoubleCbuf(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DMUL_imm(u64 insn) { | ||
| 47 | DMUL(*this, insn, GetDoubleImm20(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp new file mode 100644 index 000000000..b8e74ee44 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 17 | BitField<6, 1, u64> negate_b; | ||
| 18 | BitField<7, 1, u64> abs_a; | ||
| 19 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 20 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 21 | BitField<42, 1, u64> neg_bop_pred; | ||
| 22 | BitField<43, 1, u64> negate_a; | ||
| 23 | BitField<44, 1, u64> abs_b; | ||
| 24 | BitField<45, 2, BooleanOp> bop; | ||
| 25 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 26 | } const dsetp{insn}; | ||
| 27 | |||
| 28 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)}; | ||
| 29 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)}; | ||
| 30 | |||
| 31 | const BooleanOp bop{dsetp.bop}; | ||
| 32 | const FPCompareOp compare_op{dsetp.compare_op}; | ||
| 33 | const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)}; | ||
| 34 | const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)}; | ||
| 35 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 36 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 37 | v.ir.SetPred(dsetp.dest_pred_a, result_a); | ||
| 38 | v.ir.SetPred(dsetp.dest_pred_b, result_b); | ||
| 39 | } | ||
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | void TranslatorVisitor::DSETP_reg(u64 insn) { | ||
| 43 | DSETP(*this, insn, GetDoubleReg20(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DSETP_cbuf(u64 insn) { | ||
| 47 | DSETP(*this, insn, GetDoubleCbuf(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::DSETP_imm(u64 insn) { | ||
| 51 | DSETP(*this, insn, GetDoubleImm20(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..c2443c886 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ExitFragment(TranslatorVisitor& v) { | ||
| 12 | const ProgramHeader sph{v.env.SPH()}; | ||
| 13 | IR::Reg src_reg{IR::Reg::R0}; | ||
| 14 | for (u32 render_target = 0; render_target < 8; ++render_target) { | ||
| 15 | const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)}; | ||
| 16 | for (u32 component = 0; component < 4; ++component) { | ||
| 17 | if (!mask[component]) { | ||
| 18 | continue; | ||
| 19 | } | ||
| 20 | v.ir.SetFragColor(render_target, component, v.F(src_reg)); | ||
| 21 | ++src_reg; | ||
| 22 | } | ||
| 23 | } | ||
| 24 | if (sph.ps.omap.sample_mask != 0) { | ||
| 25 | v.ir.SetSampleMask(v.X(src_reg)); | ||
| 26 | } | ||
| 27 | if (sph.ps.omap.depth != 0) { | ||
| 28 | v.ir.SetFragDepth(v.F(src_reg + 1)); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::EXIT() { | ||
| 34 | switch (env.ShaderStage()) { | ||
| 35 | case Stage::Fragment: | ||
| 36 | ExitFragment(*this); | ||
| 37 | break; | ||
| 38 | default: | ||
| 39 | break; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp new file mode 100644 index 000000000..f0cb25d61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<40, 1, u64> tilde; | ||
| 16 | BitField<41, 1, u64> shift; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 1, u64> is_signed; | ||
| 19 | } const flo{insn}; | ||
| 20 | |||
| 21 | if (flo.cc != 0) { | ||
| 22 | throw NotImplementedException("CC"); | ||
| 23 | } | ||
| 24 | if (flo.tilde != 0) { | ||
| 25 | src = v.ir.BitwiseNot(src); | ||
| 26 | } | ||
| 27 | IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)}; | ||
| 28 | if (flo.shift != 0) { | ||
| 29 | const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))}; | ||
| 30 | result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))}; | ||
| 31 | } | ||
| 32 | v.X(flo.dest_reg, result); | ||
| 33 | } | ||
| 34 | } // Anonymous namespace | ||
| 35 | |||
| 36 | void TranslatorVisitor::FLO_reg(u64 insn) { | ||
| 37 | FLO(*this, insn, GetReg20(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::FLO_cbuf(u64 insn) { | ||
| 41 | FLO(*this, insn, GetCbuf(insn)); | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::FLO_imm(u64 insn) { | ||
| 45 | FLO(*this, insn, GetImm20(insn)); | ||
| 46 | } | ||
| 47 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..b8c89810c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, | ||
| 13 | const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a; | ||
| 18 | } const fadd{insn}; | ||
| 19 | |||
| 20 | if (cc) { | ||
| 21 | throw NotImplementedException("FADD CC"); | ||
| 22 | } | ||
| 23 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; | ||
| 24 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; | ||
| 25 | IR::FpControl control{ | ||
| 26 | .no_contraction = true, | ||
| 27 | .rounding = CastFpRounding(fp_rounding), | ||
| 28 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 29 | }; | ||
| 30 | IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; | ||
| 31 | if (sat) { | ||
| 32 | value = v.ir.FPSaturate(value); | ||
| 33 | } | ||
| 34 | v.F(fadd.dest_reg, value); | ||
| 35 | } | ||
| 36 | |||
| 37 | void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 38 | union { | ||
| 39 | u64 raw; | ||
| 40 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 41 | BitField<44, 1, u64> ftz; | ||
| 42 | BitField<45, 1, u64> neg_b; | ||
| 43 | BitField<46, 1, u64> abs_a; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<48, 1, u64> neg_a; | ||
| 46 | BitField<49, 1, u64> abs_b; | ||
| 47 | BitField<50, 1, u64> sat; | ||
| 48 | } const fadd{insn}; | ||
| 49 | |||
| 50 | FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, | ||
| 51 | fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); | ||
| 52 | } | ||
| 53 | } // Anonymous namespace | ||
| 54 | |||
| 55 | void TranslatorVisitor::FADD_reg(u64 insn) { | ||
| 56 | FADD(*this, insn, GetFloatReg20(insn)); | ||
| 57 | } | ||
| 58 | |||
| 59 | void TranslatorVisitor::FADD_cbuf(u64 insn) { | ||
| 60 | FADD(*this, insn, GetFloatCbuf(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::FADD_imm(u64 insn) { | ||
| 64 | FADD(*this, insn, GetFloatImm20(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::FADD32I(u64 insn) { | ||
| 68 | union { | ||
| 69 | u64 raw; | ||
| 70 | BitField<55, 1, u64> ftz; | ||
| 71 | BitField<56, 1, u64> neg_a; | ||
| 72 | BitField<54, 1, u64> abs_a; | ||
| 73 | BitField<52, 1, u64> cc; | ||
| 74 | BitField<53, 1, u64> neg_b; | ||
| 75 | BitField<57, 1, u64> abs_b; | ||
| 76 | } const fadd32i{insn}; | ||
| 77 | |||
| 78 | FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn), | ||
| 79 | fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0); | ||
| 80 | } | ||
| 81 | |||
| 82 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp new file mode 100644 index 000000000..7127ebf54 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<47, 1, u64> ftz; | ||
| 18 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 19 | } const fcmp{insn}; | ||
| 20 | |||
| 21 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 22 | const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; | ||
| 23 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; | ||
| 24 | const IR::U32 src_reg{v.X(fcmp.src_reg)}; | ||
| 25 | const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; | ||
| 26 | |||
| 27 | v.X(fcmp.dest_reg, result); | ||
| 28 | } | ||
| 29 | } // Anonymous namespace | ||
| 30 | |||
| 31 | void TranslatorVisitor::FCMP_reg(u64 insn) { | ||
| 32 | FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn)); | ||
| 33 | } | ||
| 34 | |||
| 35 | void TranslatorVisitor::FCMP_rc(u64 insn) { | ||
| 36 | FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn)); | ||
| 37 | } | ||
| 38 | |||
| 39 | void TranslatorVisitor::FCMP_cr(u64 insn) { | ||
| 40 | FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn)); | ||
| 41 | } | ||
| 42 | |||
| 43 | void TranslatorVisitor::FCMP_imm(u64 insn) { | ||
| 44 | union { | ||
| 45 | u64 raw; | ||
| 46 | BitField<20, 19, u64> value; | ||
| 47 | BitField<56, 1, u64> is_negative; | ||
| 48 | } const fcmp{insn}; | ||
| 49 | const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; | ||
| 50 | const u32 value{static_cast<u32>(fcmp.value) << 12}; | ||
| 51 | |||
| 52 | FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp new file mode 100644 index 000000000..eece4f28f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | BitField<43, 1, u64> negate_a; | ||
| 20 | BitField<44, 1, u64> abs_b; | ||
| 21 | BitField<45, 2, BooleanOp> bop; | ||
| 22 | BitField<47, 1, u64> cc; | ||
| 23 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 24 | BitField<52, 1, u64> bf; | ||
| 25 | BitField<53, 1, u64> negate_b; | ||
| 26 | BitField<54, 1, u64> abs_a; | ||
| 27 | BitField<55, 1, u64> ftz; | ||
| 28 | } const fset{insn}; | ||
| 29 | |||
| 30 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; | ||
| 31 | const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); | ||
| 32 | const IR::FpControl control{ | ||
| 33 | .no_contraction = false, | ||
| 34 | .rounding = IR::FpRounding::DontCare, | ||
| 35 | .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 36 | }; | ||
| 37 | |||
| 38 | IR::U1 pred{v.ir.GetPred(fset.pred)}; | ||
| 39 | if (fset.neg_pred != 0) { | ||
| 40 | pred = v.ir.LogicalNot(pred); | ||
| 41 | } | ||
| 42 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; | ||
| 43 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; | ||
| 44 | |||
| 45 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 46 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 47 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 48 | const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; | ||
| 49 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 50 | |||
| 51 | v.X(fset.dest_reg, result); | ||
| 52 | if (fset.cc != 0) { | ||
| 53 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 54 | v.SetZFlag(is_zero); | ||
| 55 | if (fset.bf != 0) { | ||
| 56 | v.ResetSFlag(); | ||
| 57 | } else { | ||
| 58 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 59 | } | ||
| 60 | v.ResetCFlag(); | ||
| 61 | v.ResetOFlag(); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void TranslatorVisitor::FSET_reg(u64 insn) { | ||
| 67 | FSET(*this, insn, GetFloatReg20(insn)); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::FSET_cbuf(u64 insn) { | ||
| 71 | FSET(*this, insn, GetFloatCbuf(insn)); | ||
| 72 | } | ||
| 73 | |||
| 74 | void TranslatorVisitor::FSET_imm(u64 insn) { | ||
| 75 | FSET(*this, insn, GetFloatImm20(insn)); | ||
| 76 | } | ||
| 77 | |||
| 78 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp new file mode 100644 index 000000000..02ab023c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 6 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 7 | |||
| 8 | namespace Shader::Maxwell { | ||
| 9 | namespace { | ||
| 10 | enum class FloatFormat : u64 { | ||
| 11 | F16 = 1, | ||
| 12 | F32 = 2, | ||
| 13 | F64 = 3, | ||
| 14 | }; | ||
| 15 | |||
| 16 | enum class RoundingOp : u64 { | ||
| 17 | None = 0, | ||
| 18 | Pass = 3, | ||
| 19 | Round = 8, | ||
| 20 | Floor = 9, | ||
| 21 | Ceil = 10, | ||
| 22 | Trunc = 11, | ||
| 23 | }; | ||
| 24 | |||
| 25 | [[nodiscard]] u32 WidthSize(FloatFormat width) { | ||
| 26 | switch (width) { | ||
| 27 | case FloatFormat::F16: | ||
| 28 | return 16; | ||
| 29 | case FloatFormat::F32: | ||
| 30 | return 32; | ||
| 31 | case FloatFormat::F64: | ||
| 32 | return 64; | ||
| 33 | default: | ||
| 34 | throw NotImplementedException("Invalid width {}", width); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { | ||
| 39 | union { | ||
| 40 | u64 insn; | ||
| 41 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 42 | BitField<44, 1, u64> ftz; | ||
| 43 | BitField<45, 1, u64> neg; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<50, 1, u64> sat; | ||
| 46 | BitField<39, 4, u64> rounding_op; | ||
| 47 | BitField<39, 2, FpRounding> rounding; | ||
| 48 | BitField<10, 2, FloatFormat> src_size; | ||
| 49 | BitField<8, 2, FloatFormat> dst_size; | ||
| 50 | |||
| 51 | [[nodiscard]] RoundingOp RoundingOperation() const { | ||
| 52 | constexpr u64 rounding_mask = 0x0B; | ||
| 53 | return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask); | ||
| 54 | } | ||
| 55 | } const f2f{insn}; | ||
| 56 | |||
| 57 | if (f2f.cc != 0) { | ||
| 58 | throw NotImplementedException("F2F CC"); | ||
| 59 | } | ||
| 60 | |||
| 61 | IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; | ||
| 62 | |||
| 63 | const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; | ||
| 64 | IR::FpControl fp_control{ | ||
| 65 | .no_contraction = false, | ||
| 66 | .rounding = IR::FpRounding::DontCare, | ||
| 67 | .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 68 | }; | ||
| 69 | if (f2f.src_size != f2f.dst_size) { | ||
| 70 | fp_control.rounding = CastFpRounding(f2f.rounding); | ||
| 71 | input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control); | ||
| 72 | } else { | ||
| 73 | switch (f2f.RoundingOperation()) { | ||
| 74 | case RoundingOp::None: | ||
| 75 | case RoundingOp::Pass: | ||
| 76 | // Make sure NANs are handled properly | ||
| 77 | switch (f2f.src_size) { | ||
| 78 | case FloatFormat::F16: | ||
| 79 | input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); | ||
| 80 | break; | ||
| 81 | case FloatFormat::F32: | ||
| 82 | input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); | ||
| 83 | break; | ||
| 84 | case FloatFormat::F64: | ||
| 85 | input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); | ||
| 86 | break; | ||
| 87 | } | ||
| 88 | break; | ||
| 89 | case RoundingOp::Round: | ||
| 90 | input = v.ir.FPRoundEven(input, fp_control); | ||
| 91 | break; | ||
| 92 | case RoundingOp::Floor: | ||
| 93 | input = v.ir.FPFloor(input, fp_control); | ||
| 94 | break; | ||
| 95 | case RoundingOp::Ceil: | ||
| 96 | input = v.ir.FPCeil(input, fp_control); | ||
| 97 | break; | ||
| 98 | case RoundingOp::Trunc: | ||
| 99 | input = v.ir.FPTrunc(input, fp_control); | ||
| 100 | break; | ||
| 101 | default: | ||
| 102 | throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value()); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | if (f2f.sat != 0 && !any_fp64) { | ||
| 106 | input = v.ir.FPSaturate(input); | ||
| 107 | } | ||
| 108 | |||
| 109 | switch (f2f.dst_size) { | ||
| 110 | case FloatFormat::F16: { | ||
| 111 | const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; | ||
| 112 | v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); | ||
| 113 | break; | ||
| 114 | } | ||
| 115 | case FloatFormat::F32: | ||
| 116 | v.F(f2f.dest_reg, input); | ||
| 117 | break; | ||
| 118 | case FloatFormat::F64: | ||
| 119 | v.D(f2f.dest_reg, input); | ||
| 120 | break; | ||
| 121 | default: | ||
| 122 | throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value()); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | } // Anonymous namespace | ||
| 126 | |||
| 127 | void TranslatorVisitor::F2F_reg(u64 insn) { | ||
| 128 | union { | ||
| 129 | u64 insn; | ||
| 130 | BitField<49, 1, u64> abs; | ||
| 131 | BitField<10, 2, FloatFormat> src_size; | ||
| 132 | BitField<41, 1, u64> selector; | ||
| 133 | } const f2f{insn}; | ||
| 134 | |||
| 135 | IR::F16F32F64 src_a; | ||
| 136 | switch (f2f.src_size) { | ||
| 137 | case FloatFormat::F16: { | ||
| 138 | auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; | ||
| 139 | src_a = f2f.selector != 0 ? rhs_a : lhs_a; | ||
| 140 | break; | ||
| 141 | } | ||
| 142 | case FloatFormat::F32: | ||
| 143 | src_a = GetFloatReg20(insn); | ||
| 144 | break; | ||
| 145 | case FloatFormat::F64: | ||
| 146 | src_a = GetDoubleReg20(insn); | ||
| 147 | break; | ||
| 148 | default: | ||
| 149 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 150 | } | ||
| 151 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 152 | } | ||
| 153 | |||
| 154 | void TranslatorVisitor::F2F_cbuf(u64 insn) { | ||
| 155 | union { | ||
| 156 | u64 insn; | ||
| 157 | BitField<49, 1, u64> abs; | ||
| 158 | BitField<10, 2, FloatFormat> src_size; | ||
| 159 | BitField<41, 1, u64> selector; | ||
| 160 | } const f2f{insn}; | ||
| 161 | |||
| 162 | IR::F16F32F64 src_a; | ||
| 163 | switch (f2f.src_size) { | ||
| 164 | case FloatFormat::F16: { | ||
| 165 | auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; | ||
| 166 | src_a = f2f.selector != 0 ? rhs_a : lhs_a; | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | case FloatFormat::F32: | ||
| 170 | src_a = GetFloatCbuf(insn); | ||
| 171 | break; | ||
| 172 | case FloatFormat::F64: | ||
| 173 | src_a = GetDoubleCbuf(insn); | ||
| 174 | break; | ||
| 175 | default: | ||
| 176 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 177 | } | ||
| 178 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 179 | } | ||
| 180 | |||
| 181 | void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { | ||
| 182 | union { | ||
| 183 | u64 insn; | ||
| 184 | BitField<49, 1, u64> abs; | ||
| 185 | BitField<10, 2, FloatFormat> src_size; | ||
| 186 | BitField<41, 1, u64> selector; | ||
| 187 | BitField<20, 19, u64> imm; | ||
| 188 | BitField<56, 1, u64> imm_neg; | ||
| 189 | } const f2f{insn}; | ||
| 190 | |||
| 191 | IR::F16F32F64 src_a; | ||
| 192 | switch (f2f.src_size) { | ||
| 193 | case FloatFormat::F16: { | ||
| 194 | const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)}; | ||
| 195 | const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; | ||
| 196 | src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)}; | ||
| 197 | if (f2f.imm_neg != 0) { | ||
| 198 | throw NotImplementedException("Neg bit on F16"); | ||
| 199 | } | ||
| 200 | break; | ||
| 201 | } | ||
| 202 | case FloatFormat::F32: | ||
| 203 | src_a = GetFloatImm20(insn); | ||
| 204 | break; | ||
| 205 | case FloatFormat::F64: | ||
| 206 | src_a = GetDoubleImm20(insn); | ||
| 207 | break; | ||
| 208 | default: | ||
| 209 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 210 | } | ||
| 211 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 212 | } | ||
| 213 | |||
| 214 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp new file mode 100644 index 000000000..92b1ce015 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp | |||
| @@ -0,0 +1,253 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class DestFormat : u64 { | ||
| 15 | Invalid, | ||
| 16 | I16, | ||
| 17 | I32, | ||
| 18 | I64, | ||
| 19 | }; | ||
| 20 | enum class SrcFormat : u64 { | ||
| 21 | Invalid, | ||
| 22 | F16, | ||
| 23 | F32, | ||
| 24 | F64, | ||
| 25 | }; | ||
| 26 | enum class Rounding : u64 { | ||
| 27 | Round, | ||
| 28 | Floor, | ||
| 29 | Ceil, | ||
| 30 | Trunc, | ||
| 31 | }; | ||
| 32 | |||
| 33 | union F2I { | ||
| 34 | u64 raw; | ||
| 35 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 36 | BitField<8, 2, DestFormat> dest_format; | ||
| 37 | BitField<10, 2, SrcFormat> src_format; | ||
| 38 | BitField<12, 1, u64> is_signed; | ||
| 39 | BitField<39, 2, Rounding> rounding; | ||
| 40 | BitField<41, 1, u64> half; | ||
| 41 | BitField<44, 1, u64> ftz; | ||
| 42 | BitField<45, 1, u64> abs; | ||
| 43 | BitField<47, 1, u64> cc; | ||
| 44 | BitField<49, 1, u64> neg; | ||
| 45 | }; | ||
| 46 | |||
| 47 | size_t BitSize(DestFormat dest_format) { | ||
| 48 | switch (dest_format) { | ||
| 49 | case DestFormat::I16: | ||
| 50 | return 16; | ||
| 51 | case DestFormat::I32: | ||
| 52 | return 32; | ||
| 53 | case DestFormat::I64: | ||
| 54 | return 64; | ||
| 55 | default: | ||
| 56 | throw NotImplementedException("Invalid destination format {}", dest_format); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) { | ||
| 61 | if (is_signed) { | ||
| 62 | switch (format) { | ||
| 63 | case DestFormat::I16: | ||
| 64 | return {static_cast<f64>(std::numeric_limits<s16>::max()), | ||
| 65 | static_cast<f64>(std::numeric_limits<s16>::min())}; | ||
| 66 | case DestFormat::I32: | ||
| 67 | return {static_cast<f64>(std::numeric_limits<s32>::max()), | ||
| 68 | static_cast<f64>(std::numeric_limits<s32>::min())}; | ||
| 69 | case DestFormat::I64: | ||
| 70 | return {static_cast<f64>(std::numeric_limits<s64>::max()), | ||
| 71 | static_cast<f64>(std::numeric_limits<s64>::min())}; | ||
| 72 | default: | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } else { | ||
| 76 | switch (format) { | ||
| 77 | case DestFormat::I16: | ||
| 78 | return {static_cast<f64>(std::numeric_limits<u16>::max()), | ||
| 79 | static_cast<f64>(std::numeric_limits<u16>::min())}; | ||
| 80 | case DestFormat::I32: | ||
| 81 | return {static_cast<f64>(std::numeric_limits<u32>::max()), | ||
| 82 | static_cast<f64>(std::numeric_limits<u32>::min())}; | ||
| 83 | case DestFormat::I64: | ||
| 84 | return {static_cast<f64>(std::numeric_limits<u64>::max()), | ||
| 85 | static_cast<f64>(std::numeric_limits<u64>::min())}; | ||
| 86 | default: | ||
| 87 | break; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | throw NotImplementedException("Invalid destination format {}", format); | ||
| 91 | } | ||
| 92 | |||
| 93 | IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { | ||
| 94 | union { | ||
| 95 | u64 raw; | ||
| 96 | BitField<20, 14, s64> offset; | ||
| 97 | BitField<34, 5, u64> binding; | ||
| 98 | } const cbuf{insn}; | ||
| 99 | if (cbuf.binding >= 18) { | ||
| 100 | throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); | ||
| 101 | } | ||
| 102 | if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { | ||
| 103 | throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); | ||
| 104 | } | ||
| 105 | if (cbuf.offset % 2 != 0) { | ||
| 106 | throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); | ||
| 107 | } | ||
| 108 | const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))}; | ||
| 109 | const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)}; | ||
| 110 | const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; | ||
| 111 | const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; | ||
| 112 | return v.ir.PackDouble2x32(vector); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { | ||
| 116 | // F2I is used to convert from a floating point value to an integer | ||
| 117 | const F2I f2i{insn}; | ||
| 118 | |||
| 119 | const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && | ||
| 120 | f2i.dest_format != DestFormat::I64}; | ||
| 121 | IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; | ||
| 122 | if (denorm_cares) { | ||
| 123 | fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; | ||
| 124 | } | ||
| 125 | const IR::FpControl fp_control{ | ||
| 126 | .no_contraction = true, | ||
| 127 | .rounding = IR::FpRounding::DontCare, | ||
| 128 | .fmz_mode = fmz_mode, | ||
| 129 | }; | ||
| 130 | const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; | ||
| 131 | const IR::F16F32F64 rounded_value{[&] { | ||
| 132 | switch (f2i.rounding) { | ||
| 133 | case Rounding::Round: | ||
| 134 | return v.ir.FPRoundEven(op_a, fp_control); | ||
| 135 | case Rounding::Floor: | ||
| 136 | return v.ir.FPFloor(op_a, fp_control); | ||
| 137 | case Rounding::Ceil: | ||
| 138 | return v.ir.FPCeil(op_a, fp_control); | ||
| 139 | case Rounding::Trunc: | ||
| 140 | return v.ir.FPTrunc(op_a, fp_control); | ||
| 141 | default: | ||
| 142 | throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); | ||
| 143 | } | ||
| 144 | }()}; | ||
| 145 | const bool is_signed{f2i.is_signed != 0}; | ||
| 146 | const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); | ||
| 147 | |||
| 148 | IR::F16F32F64 intermediate; | ||
| 149 | switch (f2i.src_format) { | ||
| 150 | case SrcFormat::F16: { | ||
| 151 | const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))}; | ||
| 152 | const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))}; | ||
| 153 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | case SrcFormat::F32: { | ||
| 157 | const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))}; | ||
| 158 | const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))}; | ||
| 159 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | case SrcFormat::F64: { | ||
| 163 | const IR::F64 max_val{v.ir.Imm64(max_bound)}; | ||
| 164 | const IR::F64 min_val{v.ir.Imm64(min_bound)}; | ||
| 165 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | default: | ||
| 169 | throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); | ||
| 170 | } | ||
| 171 | |||
| 172 | const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))}; | ||
| 173 | IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; | ||
| 174 | |||
| 175 | bool handled_special_case = false; | ||
| 176 | const bool special_nan_cases = | ||
| 177 | (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); | ||
| 178 | if (special_nan_cases) { | ||
| 179 | if (f2i.dest_format == DestFormat::I32) { | ||
| 180 | handled_special_case = true; | ||
| 181 | result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; | ||
| 182 | } else if (f2i.dest_format == DestFormat::I64) { | ||
| 183 | handled_special_case = true; | ||
| 184 | result = IR::U64{ | ||
| 185 | v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | if (!handled_special_case && is_signed) { | ||
| 189 | if (bitsize != 64) { | ||
| 190 | result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; | ||
| 191 | } else { | ||
| 192 | result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)}; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | if (bitsize == 64) { | ||
| 197 | v.L(f2i.dest_reg, result); | ||
| 198 | } else { | ||
| 199 | v.X(f2i.dest_reg, result); | ||
| 200 | } | ||
| 201 | |||
| 202 | if (f2i.cc != 0) { | ||
| 203 | throw NotImplementedException("F2I CC"); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } // Anonymous namespace | ||
| 207 | |||
| 208 | void TranslatorVisitor::F2I_reg(u64 insn) { | ||
| 209 | union { | ||
| 210 | u64 raw; | ||
| 211 | F2I base; | ||
| 212 | BitField<20, 8, IR::Reg> src_reg; | ||
| 213 | } const f2i{insn}; | ||
| 214 | |||
| 215 | const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { | ||
| 216 | switch (f2i.base.src_format) { | ||
| 217 | case SrcFormat::F16: | ||
| 218 | return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)}; | ||
| 219 | case SrcFormat::F32: | ||
| 220 | return F(f2i.src_reg); | ||
| 221 | case SrcFormat::F64: | ||
| 222 | return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); | ||
| 223 | default: | ||
| 224 | throw NotImplementedException("Invalid F2I source format {}", | ||
| 225 | f2i.base.src_format.Value()); | ||
| 226 | } | ||
| 227 | }()}; | ||
| 228 | TranslateF2I(*this, insn, op_a); | ||
| 229 | } | ||
| 230 | |||
| 231 | void TranslatorVisitor::F2I_cbuf(u64 insn) { | ||
| 232 | const F2I f2i{insn}; | ||
| 233 | const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { | ||
| 234 | switch (f2i.src_format) { | ||
| 235 | case SrcFormat::F16: | ||
| 236 | return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; | ||
| 237 | case SrcFormat::F32: | ||
| 238 | return GetFloatCbuf(insn); | ||
| 239 | case SrcFormat::F64: { | ||
| 240 | return UnpackCbuf(*this, insn); | ||
| 241 | } | ||
| 242 | default: | ||
| 243 | throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); | ||
| 244 | } | ||
| 245 | }()}; | ||
| 246 | TranslateF2I(*this, insn, op_a); | ||
| 247 | } | ||
| 248 | |||
| 249 | void TranslatorVisitor::F2I_imm(u64) { | ||
| 250 | throw NotImplementedException("{}", Opcode::F2I_imm); | ||
| 251 | } | ||
| 252 | |||
| 253 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fa2a7807b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a, | ||
| 13 | bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a; | ||
| 18 | } const ffma{insn}; | ||
| 19 | |||
| 20 | if (cc) { | ||
| 21 | throw NotImplementedException("FFMA CC"); | ||
| 22 | } | ||
| 23 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)}; | ||
| 24 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 25 | const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; | ||
| 26 | const IR::FpControl fp_control{ | ||
| 27 | .no_contraction = true, | ||
| 28 | .rounding = CastFpRounding(fp_rounding), | ||
| 29 | .fmz_mode = CastFmzMode(fmz_mode), | ||
| 30 | }; | ||
| 31 | IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; | ||
| 32 | if (fmz_mode == FmzMode::FMZ && !sat) { | ||
| 33 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 34 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 35 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 36 | const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; | ||
| 37 | const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; | ||
| 38 | const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; | ||
| 39 | value = IR::F32{v.ir.Select(any_zero, op_c, value)}; | ||
| 40 | } | ||
| 41 | if (sat) { | ||
| 42 | value = v.ir.FPSaturate(value); | ||
| 43 | } | ||
| 44 | v.F(ffma.dest_reg, value); | ||
| 45 | } | ||
| 46 | |||
| 47 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { | ||
| 48 | union { | ||
| 49 | u64 raw; | ||
| 50 | BitField<47, 1, u64> cc; | ||
| 51 | BitField<48, 1, u64> neg_b; | ||
| 52 | BitField<49, 1, u64> neg_c; | ||
| 53 | BitField<50, 1, u64> sat; | ||
| 54 | BitField<51, 2, FpRounding> fp_rounding; | ||
| 55 | BitField<53, 2, FmzMode> fmz_mode; | ||
| 56 | } const ffma{insn}; | ||
| 57 | |||
| 58 | FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, | ||
| 59 | ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | void TranslatorVisitor::FFMA_reg(u64 insn) { | ||
| 64 | FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::FFMA_rc(u64 insn) { | ||
| 68 | FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | void TranslatorVisitor::FFMA_cr(u64 insn) { | ||
| 72 | FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); | ||
| 73 | } | ||
| 74 | |||
| 75 | void TranslatorVisitor::FFMA_imm(u64 insn) { | ||
| 76 | FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn)); | ||
| 77 | } | ||
| 78 | |||
| 79 | void TranslatorVisitor::FFMA32I(u64 insn) { | ||
| 80 | union { | ||
| 81 | u64 raw; | ||
| 82 | BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register | ||
| 83 | BitField<52, 1, u64> cc; | ||
| 84 | BitField<53, 2, FmzMode> fmz_mode; | ||
| 85 | BitField<55, 1, u64> sat; | ||
| 86 | BitField<56, 1, u64> neg_a; | ||
| 87 | BitField<57, 1, u64> neg_c; | ||
| 88 | } const ffma32i{insn}; | ||
| 89 | |||
| 90 | FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false, | ||
| 91 | ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN); | ||
| 92 | } | ||
| 93 | |||
| 94 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp new file mode 100644 index 000000000..c0d6ee5af --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<44, 1, u64> ftz; | ||
| 19 | BitField<45, 1, u64> negate_b; | ||
| 20 | BitField<46, 1, u64> abs_a; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | BitField<48, 1, u64> negate_a; | ||
| 23 | BitField<49, 1, u64> abs_b; | ||
| 24 | } const fmnmx{insn}; | ||
| 25 | |||
| 26 | if (fmnmx.cc) { | ||
| 27 | throw NotImplementedException("FMNMX CC"); | ||
| 28 | } | ||
| 29 | |||
| 30 | const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; | ||
| 31 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; | ||
| 32 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; | ||
| 33 | |||
| 34 | const IR::FpControl control{ | ||
| 35 | .no_contraction = false, | ||
| 36 | .rounding = IR::FpRounding::DontCare, | ||
| 37 | .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 38 | }; | ||
| 39 | IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; | ||
| 40 | IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; | ||
| 41 | |||
| 42 | if (fmnmx.neg_pred != 0) { | ||
| 43 | std::swap(min, max); | ||
| 44 | } | ||
| 45 | |||
| 46 | v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)}); | ||
| 47 | } | ||
| 48 | } // Anonymous namespace | ||
| 49 | |||
| 50 | void TranslatorVisitor::FMNMX_reg(u64 insn) { | ||
| 51 | FMNMX(*this, insn, GetFloatReg20(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::FMNMX_cbuf(u64 insn) { | ||
| 55 | FMNMX(*this, insn, GetFloatCbuf(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | void TranslatorVisitor::FMNMX_imm(u64 insn) { | ||
| 59 | FMNMX(*this, insn, GetFloatImm20(insn)); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp new file mode 100644 index 000000000..2f8605619 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class Operation : u64 { | ||
| 14 | Cos = 0, | ||
| 15 | Sin = 1, | ||
| 16 | Ex2 = 2, // Base 2 exponent | ||
| 17 | Lg2 = 3, // Base 2 logarithm | ||
| 18 | Rcp = 4, // Reciprocal | ||
| 19 | Rsq = 5, // Reciprocal square root | ||
| 20 | Rcp64H = 6, // 64-bit reciprocal | ||
| 21 | Rsq64H = 7, // 64-bit reciprocal square root | ||
| 22 | Sqrt = 8, | ||
| 23 | }; | ||
| 24 | } // Anonymous namespace | ||
| 25 | |||
| 26 | void TranslatorVisitor::MUFU(u64 insn) { | ||
| 27 | // MUFU is used to implement a bunch of special functions. See Operation. | ||
| 28 | union { | ||
| 29 | u64 raw; | ||
| 30 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 31 | BitField<8, 8, IR::Reg> src_reg; | ||
| 32 | BitField<20, 4, Operation> operation; | ||
| 33 | BitField<46, 1, u64> abs; | ||
| 34 | BitField<48, 1, u64> neg; | ||
| 35 | BitField<50, 1, u64> sat; | ||
| 36 | } const mufu{insn}; | ||
| 37 | |||
| 38 | const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; | ||
| 39 | IR::F32 value{[&]() -> IR::F32 { | ||
| 40 | switch (mufu.operation) { | ||
| 41 | case Operation::Cos: | ||
| 42 | return ir.FPCos(op_a); | ||
| 43 | case Operation::Sin: | ||
| 44 | return ir.FPSin(op_a); | ||
| 45 | case Operation::Ex2: | ||
| 46 | return ir.FPExp2(op_a); | ||
| 47 | case Operation::Lg2: | ||
| 48 | return ir.FPLog2(op_a); | ||
| 49 | case Operation::Rcp: | ||
| 50 | return ir.FPRecip(op_a); | ||
| 51 | case Operation::Rsq: | ||
| 52 | return ir.FPRecipSqrt(op_a); | ||
| 53 | case Operation::Rcp64H: | ||
| 54 | throw NotImplementedException("MUFU.RCP64H"); | ||
| 55 | case Operation::Rsq64H: | ||
| 56 | throw NotImplementedException("MUFU.RSQ64H"); | ||
| 57 | case Operation::Sqrt: | ||
| 58 | return ir.FPSqrt(op_a); | ||
| 59 | default: | ||
| 60 | throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value()); | ||
| 61 | } | ||
| 62 | }()}; | ||
| 63 | |||
| 64 | if (mufu.sat) { | ||
| 65 | value = ir.FPSaturate(value); | ||
| 66 | } | ||
| 67 | |||
| 68 | F(mufu.dest_reg, value); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..06226b7ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp | |||
| @@ -0,0 +1,127 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Scale : u64 { | ||
| 15 | None, | ||
| 16 | D2, | ||
| 17 | D4, | ||
| 18 | D8, | ||
| 19 | M8, | ||
| 20 | M4, | ||
| 21 | M2, | ||
| 22 | INVALIDSCALE37, | ||
| 23 | }; | ||
| 24 | |||
| 25 | float ScaleFactor(Scale scale) { | ||
| 26 | switch (scale) { | ||
| 27 | case Scale::None: | ||
| 28 | return 1.0f; | ||
| 29 | case Scale::D2: | ||
| 30 | return 1.0f / 2.0f; | ||
| 31 | case Scale::D4: | ||
| 32 | return 1.0f / 4.0f; | ||
| 33 | case Scale::D8: | ||
| 34 | return 1.0f / 8.0f; | ||
| 35 | case Scale::M8: | ||
| 36 | return 8.0f; | ||
| 37 | case Scale::M4: | ||
| 38 | return 4.0f; | ||
| 39 | case Scale::M2: | ||
| 40 | return 2.0f; | ||
| 41 | case Scale::INVALIDSCALE37: | ||
| 42 | break; | ||
| 43 | } | ||
| 44 | throw NotImplementedException("Invalid FMUL scale {}", scale); | ||
| 45 | } | ||
| 46 | |||
| 47 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode, | ||
| 48 | FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { | ||
| 49 | union { | ||
| 50 | u64 raw; | ||
| 51 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 52 | BitField<8, 8, IR::Reg> src_a; | ||
| 53 | } const fmul{insn}; | ||
| 54 | |||
| 55 | if (cc) { | ||
| 56 | throw NotImplementedException("FMUL CC"); | ||
| 57 | } | ||
| 58 | IR::F32 op_a{v.F(fmul.src_a)}; | ||
| 59 | if (scale != Scale::None) { | ||
| 60 | if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { | ||
| 61 | throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); | ||
| 62 | } | ||
| 63 | op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); | ||
| 64 | } | ||
| 65 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 66 | const IR::FpControl fp_control{ | ||
| 67 | .no_contraction = true, | ||
| 68 | .rounding = CastFpRounding(fp_rounding), | ||
| 69 | .fmz_mode = CastFmzMode(fmz_mode), | ||
| 70 | }; | ||
| 71 | IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; | ||
| 72 | if (fmz_mode == FmzMode::FMZ && !sat) { | ||
| 73 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 74 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 75 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 76 | const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; | ||
| 77 | const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; | ||
| 78 | const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; | ||
| 79 | value = IR::F32{v.ir.Select(any_zero, zero, value)}; | ||
| 80 | } | ||
| 81 | if (sat) { | ||
| 82 | value = v.ir.FPSaturate(value); | ||
| 83 | } | ||
| 84 | v.F(fmul.dest_reg, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 88 | union { | ||
| 89 | u64 raw; | ||
| 90 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 91 | BitField<41, 3, Scale> scale; | ||
| 92 | BitField<44, 2, FmzMode> fmz; | ||
| 93 | BitField<47, 1, u64> cc; | ||
| 94 | BitField<48, 1, u64> neg_b; | ||
| 95 | BitField<50, 1, u64> sat; | ||
| 96 | } const fmul{insn}; | ||
| 97 | |||
| 98 | FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, | ||
| 99 | fmul.neg_b != 0); | ||
| 100 | } | ||
| 101 | } // Anonymous namespace | ||
| 102 | |||
| 103 | void TranslatorVisitor::FMUL_reg(u64 insn) { | ||
| 104 | return FMUL(*this, insn, GetFloatReg20(insn)); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TranslatorVisitor::FMUL_cbuf(u64 insn) { | ||
| 108 | return FMUL(*this, insn, GetFloatCbuf(insn)); | ||
| 109 | } | ||
| 110 | |||
| 111 | void TranslatorVisitor::FMUL_imm(u64 insn) { | ||
| 112 | return FMUL(*this, insn, GetFloatImm20(insn)); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslatorVisitor::FMUL32I(u64 insn) { | ||
| 116 | union { | ||
| 117 | u64 raw; | ||
| 118 | BitField<52, 1, u64> cc; | ||
| 119 | BitField<53, 2, FmzMode> fmz; | ||
| 120 | BitField<55, 1, u64> sat; | ||
| 121 | } const fmul32i{insn}; | ||
| 122 | |||
| 123 | FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, | ||
| 124 | fmul32i.sat != 0, fmul32i.cc != 0, false); | ||
| 125 | } | ||
| 126 | |||
| 127 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp new file mode 100644 index 000000000..f91b93fad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | SINCOS, | ||
| 13 | EX2, | ||
| 14 | }; | ||
| 15 | |||
| 16 | void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) { | ||
| 17 | union { | ||
| 18 | u64 raw; | ||
| 19 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 20 | BitField<39, 1, Mode> mode; | ||
| 21 | BitField<45, 1, u64> neg; | ||
| 22 | BitField<49, 1, u64> abs; | ||
| 23 | } const rro{insn}; | ||
| 24 | |||
| 25 | v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0)); | ||
| 26 | } | ||
| 27 | } // Anonymous namespace | ||
| 28 | |||
| 29 | void TranslatorVisitor::RRO_reg(u64 insn) { | ||
| 30 | RRO(*this, insn, GetFloatReg20(insn)); | ||
| 31 | } | ||
| 32 | |||
| 33 | void TranslatorVisitor::RRO_cbuf(u64 insn) { | ||
| 34 | RRO(*this, insn, GetFloatCbuf(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::RRO_imm(u64) { | ||
| 38 | throw NotImplementedException("RRO (imm)"); | ||
| 39 | } | ||
| 40 | |||
| 41 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp new file mode 100644 index 000000000..5f93a1513 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 17 | BitField<6, 1, u64> negate_b; | ||
| 18 | BitField<7, 1, u64> abs_a; | ||
| 19 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 20 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 21 | BitField<42, 1, u64> neg_bop_pred; | ||
| 22 | BitField<43, 1, u64> negate_a; | ||
| 23 | BitField<44, 1, u64> abs_b; | ||
| 24 | BitField<45, 2, BooleanOp> bop; | ||
| 25 | BitField<47, 1, u64> ftz; | ||
| 26 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 27 | } const fsetp{insn}; | ||
| 28 | |||
| 29 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; | ||
| 30 | const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); | ||
| 31 | const IR::FpControl control{ | ||
| 32 | .no_contraction = false, | ||
| 33 | .rounding = IR::FpRounding::DontCare, | ||
| 34 | .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 35 | }; | ||
| 36 | |||
| 37 | const BooleanOp bop{fsetp.bop}; | ||
| 38 | const FPCompareOp compare_op{fsetp.compare_op}; | ||
| 39 | const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; | ||
| 40 | const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; | ||
| 41 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 42 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 43 | v.ir.SetPred(fsetp.dest_pred_a, result_a); | ||
| 44 | v.ir.SetPred(fsetp.dest_pred_b, result_b); | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void TranslatorVisitor::FSETP_reg(u64 insn) { | ||
| 49 | FSETP(*this, insn, GetFloatReg20(insn)); | ||
| 50 | } | ||
| 51 | |||
| 52 | void TranslatorVisitor::FSETP_cbuf(u64 insn) { | ||
| 53 | FSETP(*this, insn, GetFloatCbuf(insn)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::FSETP_imm(u64 insn) { | ||
| 57 | FSETP(*this, insn, GetFloatImm20(insn)); | ||
| 58 | } | ||
| 59 | |||
| 60 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp new file mode 100644 index 000000000..7550a8d4c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::FSWZADD(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<28, 8, u64> swizzle; | ||
| 16 | BitField<38, 1, u64> ndv; | ||
| 17 | BitField<39, 2, FpRounding> round; | ||
| 18 | BitField<44, 1, u64> ftz; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | } const fswzadd{insn}; | ||
| 21 | |||
| 22 | if (fswzadd.ndv != 0) { | ||
| 23 | throw NotImplementedException("FSWZADD NDV"); | ||
| 24 | } | ||
| 25 | |||
| 26 | const IR::F32 src_a{GetFloatReg8(insn)}; | ||
| 27 | const IR::F32 src_b{GetFloatReg20(insn)}; | ||
| 28 | const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))}; | ||
| 29 | |||
| 30 | const IR::FpControl fp_control{ | ||
| 31 | .no_contraction = false, | ||
| 32 | .rounding = CastFpRounding(fswzadd.round), | ||
| 33 | .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 34 | }; | ||
| 35 | |||
| 36 | const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; | ||
| 37 | F(fswzadd.dest_reg, result); | ||
| 38 | |||
| 39 | if (fswzadd.cc != 0) { | ||
| 40 | throw NotImplementedException("FSWZADD CC"); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..f2738a93b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, | ||
| 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 11 | union { | ||
| 12 | u64 raw; | ||
| 13 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 14 | BitField<8, 8, IR::Reg> src_a; | ||
| 15 | } const hadd2{insn}; | ||
| 16 | |||
| 17 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; | ||
| 18 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 19 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 20 | if (promotion) { | ||
| 21 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 22 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 23 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 24 | } | ||
| 25 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 26 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 27 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 31 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 32 | |||
| 33 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 34 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 35 | |||
| 36 | const IR::FpControl fp_control{ | ||
| 37 | .no_contraction = true, | ||
| 38 | .rounding = IR::FpRounding::DontCare, | ||
| 39 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 40 | }; | ||
| 41 | IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; | ||
| 42 | IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; | ||
| 43 | if (sat) { | ||
| 44 | lhs = v.ir.FPSaturate(lhs); | ||
| 45 | rhs = v.ir.FPSaturate(rhs); | ||
| 46 | } | ||
| 47 | if (promotion) { | ||
| 48 | lhs = v.ir.FPConvert(16, lhs); | ||
| 49 | rhs = v.ir.FPConvert(16, rhs); | ||
| 50 | } | ||
| 51 | v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b, | ||
| 55 | const IR::U32& src_b) { | ||
| 56 | union { | ||
| 57 | u64 raw; | ||
| 58 | BitField<49, 2, Merge> merge; | ||
| 59 | BitField<39, 1, u64> ftz; | ||
| 60 | BitField<43, 1, u64> neg_a; | ||
| 61 | BitField<44, 1, u64> abs_a; | ||
| 62 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 63 | } const hadd2{insn}; | ||
| 64 | |||
| 65 | HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, | ||
| 66 | hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); | ||
| 67 | } | ||
| 68 | } // Anonymous namespace | ||
| 69 | |||
| 70 | void TranslatorVisitor::HADD2_reg(u64 insn) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<32, 1, u64> sat; | ||
| 74 | BitField<31, 1, u64> neg_b; | ||
| 75 | BitField<30, 1, u64> abs_b; | ||
| 76 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 77 | } const hadd2{insn}; | ||
| 78 | |||
| 79 | HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, | ||
| 80 | GetReg20(insn)); | ||
| 81 | } | ||
| 82 | |||
| 83 | void TranslatorVisitor::HADD2_cbuf(u64 insn) { | ||
| 84 | union { | ||
| 85 | u64 raw; | ||
| 86 | BitField<52, 1, u64> sat; | ||
| 87 | BitField<56, 1, u64> neg_b; | ||
| 88 | BitField<54, 1, u64> abs_b; | ||
| 89 | } const hadd2{insn}; | ||
| 90 | |||
| 91 | HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, | ||
| 92 | GetCbuf(insn)); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::HADD2_imm(u64 insn) { | ||
| 96 | union { | ||
| 97 | u64 raw; | ||
| 98 | BitField<52, 1, u64> sat; | ||
| 99 | BitField<56, 1, u64> neg_high; | ||
| 100 | BitField<30, 9, u64> high; | ||
| 101 | BitField<29, 1, u64> neg_low; | ||
| 102 | BitField<20, 9, u64> low; | ||
| 103 | } const hadd2{insn}; | ||
| 104 | |||
| 105 | const u32 imm{ | ||
| 106 | static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 107 | static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 108 | HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 109 | } | ||
| 110 | |||
| 111 | void TranslatorVisitor::HADD2_32I(u64 insn) { | ||
| 112 | union { | ||
| 113 | u64 raw; | ||
| 114 | BitField<55, 1, u64> ftz; | ||
| 115 | BitField<52, 1, u64> sat; | ||
| 116 | BitField<56, 1, u64> neg_a; | ||
| 117 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 118 | BitField<20, 32, u64> imm32; | ||
| 119 | } const hadd2{insn}; | ||
| 120 | |||
| 121 | const u32 imm{static_cast<u32>(hadd2.imm32)}; | ||
| 122 | HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, | ||
| 123 | hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 124 | } | ||
| 125 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fd7986701 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp | |||
| @@ -0,0 +1,169 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, | ||
| 10 | Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, | ||
| 11 | bool sat, HalfPrecision precision) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a; | ||
| 16 | } const hfma2{insn}; | ||
| 17 | |||
| 18 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)}; | ||
| 19 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 20 | auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)}; | ||
| 21 | const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()}; | ||
| 22 | if (promotion) { | ||
| 23 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 24 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 25 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 26 | } | ||
| 27 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 28 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 29 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 30 | } | ||
| 31 | if (lhs_c.Type() == IR::Type::F16) { | ||
| 32 | lhs_c = v.ir.FPConvert(32, lhs_c); | ||
| 33 | rhs_c = v.ir.FPConvert(32, rhs_c); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b); | ||
| 38 | rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b); | ||
| 39 | |||
| 40 | lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c); | ||
| 41 | rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); | ||
| 42 | |||
| 43 | const IR::FpControl fp_control{ | ||
| 44 | .no_contraction = true, | ||
| 45 | .rounding = IR::FpRounding::DontCare, | ||
| 46 | .fmz_mode = HalfPrecision2FmzMode(precision), | ||
| 47 | }; | ||
| 48 | IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; | ||
| 49 | IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; | ||
| 50 | if (precision == HalfPrecision::FMZ && !sat) { | ||
| 51 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 52 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 53 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 54 | const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; | ||
| 55 | const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; | ||
| 56 | const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; | ||
| 57 | lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)}; | ||
| 58 | |||
| 59 | const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; | ||
| 60 | const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; | ||
| 61 | const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; | ||
| 62 | rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)}; | ||
| 63 | } | ||
| 64 | if (sat) { | ||
| 65 | lhs = v.ir.FPSaturate(lhs); | ||
| 66 | rhs = v.ir.FPSaturate(rhs); | ||
| 67 | } | ||
| 68 | if (promotion) { | ||
| 69 | lhs = v.ir.FPConvert(16, lhs); | ||
| 70 | rhs = v.ir.FPConvert(16, rhs); | ||
| 71 | } | ||
| 72 | v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge)); | ||
| 73 | } | ||
| 74 | |||
| 75 | void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b, | ||
| 76 | Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, | ||
| 77 | HalfPrecision precision) { | ||
| 78 | union { | ||
| 79 | u64 raw; | ||
| 80 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 81 | BitField<49, 2, Merge> merge; | ||
| 82 | } const hfma2{insn}; | ||
| 83 | |||
| 84 | HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, | ||
| 85 | sat, precision); | ||
| 86 | } | ||
| 87 | } // Anonymous namespace | ||
| 88 | |||
| 89 | void TranslatorVisitor::HFMA2_reg(u64 insn) { | ||
| 90 | union { | ||
| 91 | u64 raw; | ||
| 92 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 93 | BitField<32, 1, u64> saturate; | ||
| 94 | BitField<31, 1, u64> neg_b; | ||
| 95 | BitField<30, 1, u64> neg_c; | ||
| 96 | BitField<35, 2, Swizzle> swizzle_c; | ||
| 97 | BitField<37, 2, HalfPrecision> precision; | ||
| 98 | } const hfma2{insn}; | ||
| 99 | |||
| 100 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c, | ||
| 101 | GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::HFMA2_rc(u64 insn) { | ||
| 105 | union { | ||
| 106 | u64 raw; | ||
| 107 | BitField<51, 1, u64> neg_c; | ||
| 108 | BitField<52, 1, u64> saturate; | ||
| 109 | BitField<53, 2, Swizzle> swizzle_b; | ||
| 110 | BitField<56, 1, u64> neg_b; | ||
| 111 | BitField<57, 2, HalfPrecision> precision; | ||
| 112 | } const hfma2{insn}; | ||
| 113 | |||
| 114 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32, | ||
| 115 | GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::HFMA2_cr(u64 insn) { | ||
| 119 | union { | ||
| 120 | u64 raw; | ||
| 121 | BitField<51, 1, u64> neg_c; | ||
| 122 | BitField<52, 1, u64> saturate; | ||
| 123 | BitField<53, 2, Swizzle> swizzle_c; | ||
| 124 | BitField<56, 1, u64> neg_b; | ||
| 125 | BitField<57, 2, HalfPrecision> precision; | ||
| 126 | } const hfma2{insn}; | ||
| 127 | |||
| 128 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c, | ||
| 129 | GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 130 | } | ||
| 131 | |||
| 132 | void TranslatorVisitor::HFMA2_imm(u64 insn) { | ||
| 133 | union { | ||
| 134 | u64 raw; | ||
| 135 | BitField<51, 1, u64> neg_c; | ||
| 136 | BitField<52, 1, u64> saturate; | ||
| 137 | BitField<53, 2, Swizzle> swizzle_c; | ||
| 138 | |||
| 139 | BitField<56, 1, u64> neg_high; | ||
| 140 | BitField<30, 9, u64> high; | ||
| 141 | BitField<29, 1, u64> neg_low; | ||
| 142 | BitField<20, 9, u64> low; | ||
| 143 | BitField<57, 2, HalfPrecision> precision; | ||
| 144 | } const hfma2{insn}; | ||
| 145 | |||
| 146 | const u32 imm{ | ||
| 147 | static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 148 | static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 149 | |||
| 150 | HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), | ||
| 151 | GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 152 | } | ||
| 153 | |||
| 154 | void TranslatorVisitor::HFMA2_32I(u64 insn) { | ||
| 155 | union { | ||
| 156 | u64 raw; | ||
| 157 | BitField<0, 8, IR::Reg> src_c; | ||
| 158 | BitField<20, 32, u64> imm32; | ||
| 159 | BitField<52, 1, u64> neg_c; | ||
| 160 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 161 | BitField<55, 2, HalfPrecision> precision; | ||
| 162 | } const hfma2{insn}; | ||
| 163 | |||
| 164 | const u32 imm{static_cast<u32>(hfma2.imm32)}; | ||
| 165 | HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0, | ||
| 166 | Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision); | ||
| 167 | } | ||
| 168 | |||
| 169 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp new file mode 100644 index 000000000..0dbeb7f56 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | |||
| 9 | IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) { | ||
| 10 | switch (precision) { | ||
| 11 | case HalfPrecision::None: | ||
| 12 | return IR::FmzMode::None; | ||
| 13 | case HalfPrecision::FTZ: | ||
| 14 | return IR::FmzMode::FTZ; | ||
| 15 | case HalfPrecision::FMZ: | ||
| 16 | return IR::FmzMode::FMZ; | ||
| 17 | default: | ||
| 18 | return IR::FmzMode::DontCare; | ||
| 19 | } | ||
| 20 | } | ||
| 21 | |||
| 22 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { | ||
| 23 | switch (swizzle) { | ||
| 24 | case Swizzle::H1_H0: { | ||
| 25 | const IR::Value vector{ir.UnpackFloat2x16(value)}; | ||
| 26 | return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; | ||
| 27 | } | ||
| 28 | case Swizzle::H0_H0: { | ||
| 29 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; | ||
| 30 | return {scalar, scalar}; | ||
| 31 | } | ||
| 32 | case Swizzle::H1_H1: { | ||
| 33 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; | ||
| 34 | return {scalar, scalar}; | ||
| 35 | } | ||
| 36 | case Swizzle::F32: { | ||
| 37 | const IR::F32 scalar{ir.BitCast<IR::F32>(value)}; | ||
| 38 | return {scalar, scalar}; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | throw InvalidArgument("Invalid swizzle {}", swizzle); | ||
| 42 | } | ||
| 43 | |||
| 44 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 45 | Merge merge) { | ||
| 46 | switch (merge) { | ||
| 47 | case Merge::H1_H0: | ||
| 48 | return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); | ||
| 49 | case Merge::F32: | ||
| 50 | return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs)); | ||
| 51 | case Merge::MRG_H0: | ||
| 52 | case Merge::MRG_H1: { | ||
| 53 | const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; | ||
| 54 | const bool is_h0{merge == Merge::MRG_H0}; | ||
| 55 | const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)}; | ||
| 56 | return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1)); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | throw InvalidArgument("Invalid merge {}", merge); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h new file mode 100644 index 000000000..59da56a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | |||
| 15 | enum class Merge : u64 { | ||
| 16 | H1_H0, | ||
| 17 | F32, | ||
| 18 | MRG_H0, | ||
| 19 | MRG_H1, | ||
| 20 | }; | ||
| 21 | |||
| 22 | enum class Swizzle : u64 { | ||
| 23 | H1_H0, | ||
| 24 | F32, | ||
| 25 | H0_H0, | ||
| 26 | H1_H1, | ||
| 27 | }; | ||
| 28 | |||
| 29 | enum class HalfPrecision : u64 { | ||
| 30 | None = 0, | ||
| 31 | FTZ = 1, | ||
| 32 | FMZ = 2, | ||
| 33 | }; | ||
| 34 | |||
| 35 | IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision); | ||
| 36 | |||
| 37 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); | ||
| 38 | |||
| 39 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 40 | Merge merge); | ||
| 41 | |||
| 42 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp new file mode 100644 index 000000000..3f548ce76 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, | ||
| 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, | ||
| 11 | HalfPrecision precision) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a; | ||
| 16 | } const hmul2{insn}; | ||
| 17 | |||
| 18 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)}; | ||
| 19 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 20 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 21 | if (promotion) { | ||
| 22 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 23 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 24 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 25 | } | ||
| 26 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 27 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 28 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 32 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 33 | |||
| 34 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 35 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 36 | |||
| 37 | const IR::FpControl fp_control{ | ||
| 38 | .no_contraction = true, | ||
| 39 | .rounding = IR::FpRounding::DontCare, | ||
| 40 | .fmz_mode = HalfPrecision2FmzMode(precision), | ||
| 41 | }; | ||
| 42 | IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; | ||
| 43 | IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; | ||
| 44 | if (precision == HalfPrecision::FMZ && !sat) { | ||
| 45 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 46 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 47 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 48 | const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; | ||
| 49 | const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; | ||
| 50 | const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; | ||
| 51 | lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)}; | ||
| 52 | |||
| 53 | const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; | ||
| 54 | const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; | ||
| 55 | const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; | ||
| 56 | rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)}; | ||
| 57 | } | ||
| 58 | if (sat) { | ||
| 59 | lhs = v.ir.FPSaturate(lhs); | ||
| 60 | rhs = v.ir.FPSaturate(rhs); | ||
| 61 | } | ||
| 62 | if (promotion) { | ||
| 63 | lhs = v.ir.FPConvert(16, lhs); | ||
| 64 | rhs = v.ir.FPConvert(16, rhs); | ||
| 65 | } | ||
| 66 | v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge)); | ||
| 67 | } | ||
| 68 | |||
| 69 | void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b, | ||
| 70 | Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<49, 2, Merge> merge; | ||
| 74 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 75 | BitField<39, 2, HalfPrecision> precision; | ||
| 76 | } const hmul2{insn}; | ||
| 77 | |||
| 78 | HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, | ||
| 79 | hmul2.precision); | ||
| 80 | } | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 83 | void TranslatorVisitor::HMUL2_reg(u64 insn) { | ||
| 84 | union { | ||
| 85 | u64 raw; | ||
| 86 | BitField<32, 1, u64> sat; | ||
| 87 | BitField<31, 1, u64> neg_b; | ||
| 88 | BitField<30, 1, u64> abs_b; | ||
| 89 | BitField<44, 1, u64> abs_a; | ||
| 90 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 91 | } const hmul2{insn}; | ||
| 92 | |||
| 93 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0, | ||
| 94 | hmul2.swizzle_b, GetReg20(insn)); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HMUL2_cbuf(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 raw; | ||
| 100 | BitField<52, 1, u64> sat; | ||
| 101 | BitField<54, 1, u64> abs_b; | ||
| 102 | BitField<43, 1, u64> neg_a; | ||
| 103 | BitField<44, 1, u64> abs_a; | ||
| 104 | } const hmul2{insn}; | ||
| 105 | |||
| 106 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false, | ||
| 107 | Swizzle::F32, GetCbuf(insn)); | ||
| 108 | } | ||
| 109 | |||
| 110 | void TranslatorVisitor::HMUL2_imm(u64 insn) { | ||
| 111 | union { | ||
| 112 | u64 raw; | ||
| 113 | BitField<52, 1, u64> sat; | ||
| 114 | BitField<56, 1, u64> neg_high; | ||
| 115 | BitField<30, 9, u64> high; | ||
| 116 | BitField<29, 1, u64> neg_low; | ||
| 117 | BitField<20, 9, u64> low; | ||
| 118 | BitField<43, 1, u64> neg_a; | ||
| 119 | BitField<44, 1, u64> abs_a; | ||
| 120 | } const hmul2{insn}; | ||
| 121 | |||
| 122 | const u32 imm{ | ||
| 123 | static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 124 | static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 125 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, | ||
| 126 | Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 127 | } | ||
| 128 | |||
| 129 | void TranslatorVisitor::HMUL2_32I(u64 insn) { | ||
| 130 | union { | ||
| 131 | u64 raw; | ||
| 132 | BitField<55, 2, HalfPrecision> precision; | ||
| 133 | BitField<52, 1, u64> sat; | ||
| 134 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 135 | BitField<20, 32, u64> imm32; | ||
| 136 | } const hmul2{insn}; | ||
| 137 | |||
| 138 | const u32 imm{static_cast<u32>(hmul2.imm32)}; | ||
| 139 | HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false, | ||
| 140 | Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision); | ||
| 141 | } | ||
| 142 | |||
| 143 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp new file mode 100644 index 000000000..cca5b831f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp | |||
| @@ -0,0 +1,117 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b, | ||
| 10 | bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) { | ||
| 11 | union { | ||
| 12 | u64 insn; | ||
| 13 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 14 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 15 | BitField<39, 3, IR::Pred> pred; | ||
| 16 | BitField<42, 1, u64> neg_pred; | ||
| 17 | BitField<43, 1, u64> neg_a; | ||
| 18 | BitField<45, 2, BooleanOp> bop; | ||
| 19 | BitField<44, 1, u64> abs_a; | ||
| 20 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 21 | } const hset2{insn}; | ||
| 22 | |||
| 23 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; | ||
| 24 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 25 | |||
| 26 | if (lhs_a.Type() != lhs_b.Type()) { | ||
| 27 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 28 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 29 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 30 | } | ||
| 31 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 32 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 33 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); | ||
| 38 | rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); | ||
| 39 | |||
| 40 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 41 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 42 | |||
| 43 | const IR::FpControl control{ | ||
| 44 | .no_contraction = false, | ||
| 45 | .rounding = IR::FpRounding::DontCare, | ||
| 46 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 47 | }; | ||
| 48 | |||
| 49 | IR::U1 pred{v.ir.GetPred(hset2.pred)}; | ||
| 50 | if (hset2.neg_pred != 0) { | ||
| 51 | pred = v.ir.LogicalNot(pred); | ||
| 52 | } | ||
| 53 | const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; | ||
| 54 | const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; | ||
| 55 | const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)}; | ||
| 56 | const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)}; | ||
| 57 | |||
| 58 | const u32 true_value = bf ? 0x3c00 : 0xffff; | ||
| 59 | const IR::U32 true_val_lhs{v.ir.Imm32(true_value)}; | ||
| 60 | const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)}; | ||
| 61 | const IR::U32 fail_result{v.ir.Imm32(0)}; | ||
| 62 | const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)}; | ||
| 63 | const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)}; | ||
| 64 | |||
| 65 | v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)}); | ||
| 66 | } | ||
| 67 | } // Anonymous namespace | ||
| 68 | |||
| 69 | void TranslatorVisitor::HSET2_reg(u64 insn) { | ||
| 70 | union { | ||
| 71 | u64 insn; | ||
| 72 | BitField<30, 1, u64> abs_b; | ||
| 73 | BitField<49, 1, u64> bf; | ||
| 74 | BitField<31, 1, u64> neg_b; | ||
| 75 | BitField<50, 1, u64> ftz; | ||
| 76 | BitField<35, 4, FPCompareOp> compare_op; | ||
| 77 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 78 | } const hset2{insn}; | ||
| 79 | |||
| 80 | HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, | ||
| 81 | hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::HSET2_cbuf(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 insn; | ||
| 87 | BitField<53, 1, u64> bf; | ||
| 88 | BitField<56, 1, u64> neg_b; | ||
| 89 | BitField<54, 1, u64> ftz; | ||
| 90 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 91 | } const hset2{insn}; | ||
| 92 | |||
| 93 | HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false, | ||
| 94 | hset2.compare_op, Swizzle::F32); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HSET2_imm(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 insn; | ||
| 100 | BitField<53, 1, u64> bf; | ||
| 101 | BitField<54, 1, u64> ftz; | ||
| 102 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 103 | BitField<56, 1, u64> neg_high; | ||
| 104 | BitField<30, 9, u64> high; | ||
| 105 | BitField<29, 1, u64> neg_low; | ||
| 106 | BitField<20, 9, u64> low; | ||
| 107 | } const hset2{insn}; | ||
| 108 | |||
| 109 | const u32 imm{ | ||
| 110 | static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 111 | static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 112 | |||
| 113 | HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, | ||
| 114 | Swizzle::H1_H0); | ||
| 115 | } | ||
| 116 | |||
| 117 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp new file mode 100644 index 000000000..b3931dae3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp | |||
| @@ -0,0 +1,118 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b, | ||
| 10 | Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) { | ||
| 11 | union { | ||
| 12 | u64 insn; | ||
| 13 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 14 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<43, 1, u64> neg_a; | ||
| 19 | BitField<45, 2, BooleanOp> bop; | ||
| 20 | BitField<44, 1, u64> abs_a; | ||
| 21 | BitField<6, 1, u64> ftz; | ||
| 22 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 23 | } const hsetp2{insn}; | ||
| 24 | |||
| 25 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; | ||
| 26 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 27 | |||
| 28 | if (lhs_a.Type() != lhs_b.Type()) { | ||
| 29 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 30 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 31 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 32 | } | ||
| 33 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 34 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 35 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); | ||
| 40 | rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); | ||
| 41 | |||
| 42 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 43 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 44 | |||
| 45 | const IR::FpControl control{ | ||
| 46 | .no_contraction = false, | ||
| 47 | .rounding = IR::FpRounding::DontCare, | ||
| 48 | .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 49 | }; | ||
| 50 | |||
| 51 | IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; | ||
| 52 | if (hsetp2.neg_pred != 0) { | ||
| 53 | pred = v.ir.LogicalNot(pred); | ||
| 54 | } | ||
| 55 | const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; | ||
| 56 | const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; | ||
| 57 | const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)}; | ||
| 58 | const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)}; | ||
| 59 | |||
| 60 | if (h_and) { | ||
| 61 | auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs); | ||
| 62 | v.ir.SetPred(hsetp2.dest_pred_a, result); | ||
| 63 | v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result)); | ||
| 64 | } else { | ||
| 65 | v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs); | ||
| 66 | v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | } // Anonymous namespace | ||
| 70 | |||
| 71 | void TranslatorVisitor::HSETP2_reg(u64 insn) { | ||
| 72 | union { | ||
| 73 | u64 insn; | ||
| 74 | BitField<30, 1, u64> abs_b; | ||
| 75 | BitField<49, 1, u64> h_and; | ||
| 76 | BitField<31, 1, u64> neg_b; | ||
| 77 | BitField<35, 4, FPCompareOp> compare_op; | ||
| 78 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 79 | } const hsetp2{insn}; | ||
| 80 | HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b, | ||
| 81 | hsetp2.compare_op, hsetp2.h_and != 0); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::HSETP2_cbuf(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 insn; | ||
| 87 | BitField<53, 1, u64> h_and; | ||
| 88 | BitField<54, 1, u64> abs_b; | ||
| 89 | BitField<56, 1, u64> neg_b; | ||
| 90 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 91 | } const hsetp2{insn}; | ||
| 92 | |||
| 93 | HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32, | ||
| 94 | hsetp2.compare_op, hsetp2.h_and != 0); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HSETP2_imm(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 insn; | ||
| 100 | BitField<53, 1, u64> h_and; | ||
| 101 | BitField<54, 1, u64> ftz; | ||
| 102 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 103 | BitField<56, 1, u64> neg_high; | ||
| 104 | BitField<30, 9, u64> high; | ||
| 105 | BitField<29, 1, u64> neg_low; | ||
| 106 | BitField<20, 9, u64> low; | ||
| 107 | } const hsetp2{insn}; | ||
| 108 | |||
| 109 | const u32 imm{static_cast<u32>(hsetp2.low << 6) | | ||
| 110 | static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 111 | static_cast<u32>(hsetp2.high << 22) | | ||
| 112 | static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 113 | |||
| 114 | HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, | ||
| 115 | hsetp2.h_and != 0); | ||
| 116 | } | ||
| 117 | |||
| 118 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp new file mode 100644 index 000000000..b446aae0e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp | |||
| @@ -0,0 +1,272 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | [[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding, | ||
| 12 | u32 offset) { | ||
| 13 | if (unaligned) { | ||
| 14 | return ir.Imm32(0); | ||
| 15 | } | ||
| 16 | return ir.GetCbuf(binding, IR::U32{IR::Value{offset}}); | ||
| 17 | } | ||
| 18 | } // Anonymous namespace | ||
| 19 | |||
| 20 | IR::U32 TranslatorVisitor::X(IR::Reg reg) { | ||
| 21 | return ir.GetReg(reg); | ||
| 22 | } | ||
| 23 | |||
| 24 | IR::U64 TranslatorVisitor::L(IR::Reg reg) { | ||
| 25 | if (!IR::IsAligned(reg, 2)) { | ||
| 26 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 27 | } | ||
| 28 | return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; | ||
| 29 | } | ||
| 30 | |||
| 31 | IR::F32 TranslatorVisitor::F(IR::Reg reg) { | ||
| 32 | return ir.BitCast<IR::F32>(X(reg)); | ||
| 33 | } | ||
| 34 | |||
| 35 | IR::F64 TranslatorVisitor::D(IR::Reg reg) { | ||
| 36 | if (!IR::IsAligned(reg, 2)) { | ||
| 37 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 38 | } | ||
| 39 | return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; | ||
| 40 | } | ||
| 41 | |||
| 42 | void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { | ||
| 43 | ir.SetReg(dest_reg, value); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { | ||
| 47 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 48 | throw NotImplementedException("Unaligned destination register {}", dest_reg); | ||
| 49 | } | ||
| 50 | const IR::Value result{ir.UnpackUint2x32(value)}; | ||
| 51 | for (int i = 0; i < 2; i++) { | ||
| 52 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { | ||
| 57 | X(dest_reg, ir.BitCast<IR::U32>(value)); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { | ||
| 61 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 62 | throw NotImplementedException("Unaligned destination register {}", dest_reg); | ||
| 63 | } | ||
| 64 | const IR::Value result{ir.UnpackDouble2x32(value)}; | ||
| 65 | for (int i = 0; i < 2; i++) { | ||
| 66 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | IR::U32 TranslatorVisitor::GetReg8(u64 insn) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<8, 8, IR::Reg> index; | ||
| 74 | } const reg{insn}; | ||
| 75 | return X(reg.index); | ||
| 76 | } | ||
| 77 | |||
| 78 | IR::U32 TranslatorVisitor::GetReg20(u64 insn) { | ||
| 79 | union { | ||
| 80 | u64 raw; | ||
| 81 | BitField<20, 8, IR::Reg> index; | ||
| 82 | } const reg{insn}; | ||
| 83 | return X(reg.index); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::U32 TranslatorVisitor::GetReg39(u64 insn) { | ||
| 87 | union { | ||
| 88 | u64 raw; | ||
| 89 | BitField<39, 8, IR::Reg> index; | ||
| 90 | } const reg{insn}; | ||
| 91 | return X(reg.index); | ||
| 92 | } | ||
| 93 | |||
| 94 | IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { | ||
| 95 | return ir.BitCast<IR::F32>(GetReg8(insn)); | ||
| 96 | } | ||
| 97 | |||
| 98 | IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { | ||
| 99 | return ir.BitCast<IR::F32>(GetReg20(insn)); | ||
| 100 | } | ||
| 101 | |||
| 102 | IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { | ||
| 103 | return ir.BitCast<IR::F32>(GetReg39(insn)); | ||
| 104 | } | ||
| 105 | |||
| 106 | IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { | ||
| 107 | union { | ||
| 108 | u64 raw; | ||
| 109 | BitField<20, 8, IR::Reg> index; | ||
| 110 | } const reg{insn}; | ||
| 111 | return D(reg.index); | ||
| 112 | } | ||
| 113 | |||
| 114 | IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { | ||
| 115 | union { | ||
| 116 | u64 raw; | ||
| 117 | BitField<39, 8, IR::Reg> index; | ||
| 118 | } const reg{insn}; | ||
| 119 | return D(reg.index); | ||
| 120 | } | ||
| 121 | |||
| 122 | static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) { | ||
| 123 | union { | ||
| 124 | u64 raw; | ||
| 125 | BitField<20, 14, u64> offset; | ||
| 126 | BitField<34, 5, u64> binding; | ||
| 127 | } const cbuf{insn}; | ||
| 128 | |||
| 129 | if (cbuf.binding >= 18) { | ||
| 130 | throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); | ||
| 131 | } | ||
| 132 | if (cbuf.offset >= 0x10'000) { | ||
| 133 | throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); | ||
| 134 | } | ||
| 135 | const IR::Value binding{static_cast<u32>(cbuf.binding)}; | ||
| 136 | const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4}; | ||
| 137 | return {IR::U32{binding}, IR::U32{byte_offset}}; | ||
| 138 | } | ||
| 139 | |||
| 140 | IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { | ||
| 141 | const auto [binding, byte_offset]{CbufAddr(insn)}; | ||
| 142 | return ir.GetCbuf(binding, byte_offset); | ||
| 143 | } | ||
| 144 | |||
| 145 | IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { | ||
| 146 | const auto [binding, byte_offset]{CbufAddr(insn)}; | ||
| 147 | return ir.GetFloatCbuf(binding, byte_offset); | ||
| 148 | } | ||
| 149 | |||
| 150 | IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { | ||
| 151 | union { | ||
| 152 | u64 raw; | ||
| 153 | BitField<20, 1, u64> unaligned; | ||
| 154 | } const cbuf{insn}; | ||
| 155 | |||
| 156 | const auto [binding, offset_value]{CbufAddr(insn)}; | ||
| 157 | const bool unaligned{cbuf.unaligned != 0}; | ||
| 158 | const u32 offset{offset_value.U32()}; | ||
| 159 | const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; | ||
| 160 | |||
| 161 | const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; | ||
| 162 | const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; | ||
| 163 | return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); | ||
| 164 | } | ||
| 165 | |||
| 166 | IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { | ||
| 167 | union { | ||
| 168 | u64 raw; | ||
| 169 | BitField<20, 1, u64> unaligned; | ||
| 170 | } const cbuf{insn}; | ||
| 171 | |||
| 172 | if (cbuf.unaligned != 0) { | ||
| 173 | throw NotImplementedException("Unaligned packed constant buffer read"); | ||
| 174 | } | ||
| 175 | const auto [binding, lower_offset]{CbufAddr(insn)}; | ||
| 176 | const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; | ||
| 177 | const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; | ||
| 178 | const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; | ||
| 179 | return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); | ||
| 180 | } | ||
| 181 | |||
| 182 | IR::U32 TranslatorVisitor::GetImm20(u64 insn) { | ||
| 183 | union { | ||
| 184 | u64 raw; | ||
| 185 | BitField<20, 19, u64> value; | ||
| 186 | BitField<56, 1, u64> is_negative; | ||
| 187 | } const imm{insn}; | ||
| 188 | |||
| 189 | if (imm.is_negative != 0) { | ||
| 190 | const s64 raw{static_cast<s64>(imm.value)}; | ||
| 191 | return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw)); | ||
| 192 | } else { | ||
| 193 | return ir.Imm32(static_cast<u32>(imm.value)); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { | ||
| 198 | union { | ||
| 199 | u64 raw; | ||
| 200 | BitField<20, 19, u64> value; | ||
| 201 | BitField<56, 1, u64> is_negative; | ||
| 202 | } const imm{insn}; | ||
| 203 | const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)}; | ||
| 204 | const u32 value{static_cast<u32>(imm.value) << 12}; | ||
| 205 | return ir.Imm32(Common::BitCast<f32>(value | sign_bit)); | ||
| 206 | } | ||
| 207 | |||
| 208 | IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { | ||
| 209 | union { | ||
| 210 | u64 raw; | ||
| 211 | BitField<20, 19, u64> value; | ||
| 212 | BitField<56, 1, u64> is_negative; | ||
| 213 | } const imm{insn}; | ||
| 214 | const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0}; | ||
| 215 | const u64 value{imm.value << 44}; | ||
| 216 | return ir.Imm64(Common::BitCast<f64>(value | sign_bit)); | ||
| 217 | } | ||
| 218 | |||
| 219 | IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { | ||
| 220 | const s64 value{GetImm20(insn).U32()}; | ||
| 221 | return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32)); | ||
| 222 | } | ||
| 223 | |||
| 224 | IR::U32 TranslatorVisitor::GetImm32(u64 insn) { | ||
| 225 | union { | ||
| 226 | u64 raw; | ||
| 227 | BitField<20, 32, u64> value; | ||
| 228 | } const imm{insn}; | ||
| 229 | return ir.Imm32(static_cast<u32>(imm.value)); | ||
| 230 | } | ||
| 231 | |||
| 232 | IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) { | ||
| 233 | union { | ||
| 234 | u64 raw; | ||
| 235 | BitField<20, 32, u64> value; | ||
| 236 | } const imm{insn}; | ||
| 237 | return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value))); | ||
| 238 | } | ||
| 239 | |||
| 240 | void TranslatorVisitor::SetZFlag(const IR::U1& value) { | ||
| 241 | ir.SetZFlag(value); | ||
| 242 | } | ||
| 243 | |||
| 244 | void TranslatorVisitor::SetSFlag(const IR::U1& value) { | ||
| 245 | ir.SetSFlag(value); | ||
| 246 | } | ||
| 247 | |||
| 248 | void TranslatorVisitor::SetCFlag(const IR::U1& value) { | ||
| 249 | ir.SetCFlag(value); | ||
| 250 | } | ||
| 251 | |||
| 252 | void TranslatorVisitor::SetOFlag(const IR::U1& value) { | ||
| 253 | ir.SetOFlag(value); | ||
| 254 | } | ||
| 255 | |||
| 256 | void TranslatorVisitor::ResetZero() { | ||
| 257 | SetZFlag(ir.Imm1(false)); | ||
| 258 | } | ||
| 259 | |||
| 260 | void TranslatorVisitor::ResetSFlag() { | ||
| 261 | SetSFlag(ir.Imm1(false)); | ||
| 262 | } | ||
| 263 | |||
| 264 | void TranslatorVisitor::ResetCFlag() { | ||
| 265 | SetCFlag(ir.Imm1(false)); | ||
| 266 | } | ||
| 267 | |||
| 268 | void TranslatorVisitor::ResetOFlag() { | ||
| 269 | SetOFlag(ir.Imm1(false)); | ||
| 270 | } | ||
| 271 | |||
| 272 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h new file mode 100644 index 000000000..335e4f24f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h | |||
| @@ -0,0 +1,387 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/instruction.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | enum class CompareOp : u64 { | ||
| 15 | False, | ||
| 16 | LessThan, | ||
| 17 | Equal, | ||
| 18 | LessThanEqual, | ||
| 19 | GreaterThan, | ||
| 20 | NotEqual, | ||
| 21 | GreaterThanEqual, | ||
| 22 | True, | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum class BooleanOp : u64 { | ||
| 26 | AND, | ||
| 27 | OR, | ||
| 28 | XOR, | ||
| 29 | }; | ||
| 30 | |||
| 31 | enum class PredicateOp : u64 { | ||
| 32 | False, | ||
| 33 | True, | ||
| 34 | Zero, | ||
| 35 | NonZero, | ||
| 36 | }; | ||
| 37 | |||
| 38 | enum class FPCompareOp : u64 { | ||
| 39 | F, | ||
| 40 | LT, | ||
| 41 | EQ, | ||
| 42 | LE, | ||
| 43 | GT, | ||
| 44 | NE, | ||
| 45 | GE, | ||
| 46 | NUM, | ||
| 47 | Nan, | ||
| 48 | LTU, | ||
| 49 | EQU, | ||
| 50 | LEU, | ||
| 51 | GTU, | ||
| 52 | NEU, | ||
| 53 | GEU, | ||
| 54 | T, | ||
| 55 | }; | ||
| 56 | |||
| 57 | class TranslatorVisitor { | ||
| 58 | public: | ||
| 59 | explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} | ||
| 60 | |||
| 61 | Environment& env; | ||
| 62 | IR::IREmitter ir; | ||
| 63 | |||
| 64 | void AL2P(u64 insn); | ||
| 65 | void ALD(u64 insn); | ||
| 66 | void AST(u64 insn); | ||
| 67 | void ATOM_cas(u64 insn); | ||
| 68 | void ATOM(u64 insn); | ||
| 69 | void ATOMS_cas(u64 insn); | ||
| 70 | void ATOMS(u64 insn); | ||
| 71 | void B2R(u64 insn); | ||
| 72 | void BAR(u64 insn); | ||
| 73 | void BFE_reg(u64 insn); | ||
| 74 | void BFE_cbuf(u64 insn); | ||
| 75 | void BFE_imm(u64 insn); | ||
| 76 | void BFI_reg(u64 insn); | ||
| 77 | void BFI_rc(u64 insn); | ||
| 78 | void BFI_cr(u64 insn); | ||
| 79 | void BFI_imm(u64 insn); | ||
| 80 | void BPT(u64 insn); | ||
| 81 | void BRA(u64 insn); | ||
| 82 | void BRK(u64 insn); | ||
| 83 | void BRX(u64 insn); | ||
| 84 | void CAL(); | ||
| 85 | void CCTL(u64 insn); | ||
| 86 | void CCTLL(u64 insn); | ||
| 87 | void CONT(u64 insn); | ||
| 88 | void CS2R(u64 insn); | ||
| 89 | void CSET(u64 insn); | ||
| 90 | void CSETP(u64 insn); | ||
| 91 | void DADD_reg(u64 insn); | ||
| 92 | void DADD_cbuf(u64 insn); | ||
| 93 | void DADD_imm(u64 insn); | ||
| 94 | void DEPBAR(); | ||
| 95 | void DFMA_reg(u64 insn); | ||
| 96 | void DFMA_rc(u64 insn); | ||
| 97 | void DFMA_cr(u64 insn); | ||
| 98 | void DFMA_imm(u64 insn); | ||
| 99 | void DMNMX_reg(u64 insn); | ||
| 100 | void DMNMX_cbuf(u64 insn); | ||
| 101 | void DMNMX_imm(u64 insn); | ||
| 102 | void DMUL_reg(u64 insn); | ||
| 103 | void DMUL_cbuf(u64 insn); | ||
| 104 | void DMUL_imm(u64 insn); | ||
| 105 | void DSET_reg(u64 insn); | ||
| 106 | void DSET_cbuf(u64 insn); | ||
| 107 | void DSET_imm(u64 insn); | ||
| 108 | void DSETP_reg(u64 insn); | ||
| 109 | void DSETP_cbuf(u64 insn); | ||
| 110 | void DSETP_imm(u64 insn); | ||
| 111 | void EXIT(); | ||
| 112 | void F2F_reg(u64 insn); | ||
| 113 | void F2F_cbuf(u64 insn); | ||
| 114 | void F2F_imm(u64 insn); | ||
| 115 | void F2I_reg(u64 insn); | ||
| 116 | void F2I_cbuf(u64 insn); | ||
| 117 | void F2I_imm(u64 insn); | ||
| 118 | void FADD_reg(u64 insn); | ||
| 119 | void FADD_cbuf(u64 insn); | ||
| 120 | void FADD_imm(u64 insn); | ||
| 121 | void FADD32I(u64 insn); | ||
| 122 | void FCHK_reg(u64 insn); | ||
| 123 | void FCHK_cbuf(u64 insn); | ||
| 124 | void FCHK_imm(u64 insn); | ||
| 125 | void FCMP_reg(u64 insn); | ||
| 126 | void FCMP_rc(u64 insn); | ||
| 127 | void FCMP_cr(u64 insn); | ||
| 128 | void FCMP_imm(u64 insn); | ||
| 129 | void FFMA_reg(u64 insn); | ||
| 130 | void FFMA_rc(u64 insn); | ||
| 131 | void FFMA_cr(u64 insn); | ||
| 132 | void FFMA_imm(u64 insn); | ||
| 133 | void FFMA32I(u64 insn); | ||
| 134 | void FLO_reg(u64 insn); | ||
| 135 | void FLO_cbuf(u64 insn); | ||
| 136 | void FLO_imm(u64 insn); | ||
| 137 | void FMNMX_reg(u64 insn); | ||
| 138 | void FMNMX_cbuf(u64 insn); | ||
| 139 | void FMNMX_imm(u64 insn); | ||
| 140 | void FMUL_reg(u64 insn); | ||
| 141 | void FMUL_cbuf(u64 insn); | ||
| 142 | void FMUL_imm(u64 insn); | ||
| 143 | void FMUL32I(u64 insn); | ||
| 144 | void FSET_reg(u64 insn); | ||
| 145 | void FSET_cbuf(u64 insn); | ||
| 146 | void FSET_imm(u64 insn); | ||
| 147 | void FSETP_reg(u64 insn); | ||
| 148 | void FSETP_cbuf(u64 insn); | ||
| 149 | void FSETP_imm(u64 insn); | ||
| 150 | void FSWZADD(u64 insn); | ||
| 151 | void GETCRSPTR(u64 insn); | ||
| 152 | void GETLMEMBASE(u64 insn); | ||
| 153 | void HADD2_reg(u64 insn); | ||
| 154 | void HADD2_cbuf(u64 insn); | ||
| 155 | void HADD2_imm(u64 insn); | ||
| 156 | void HADD2_32I(u64 insn); | ||
| 157 | void HFMA2_reg(u64 insn); | ||
| 158 | void HFMA2_rc(u64 insn); | ||
| 159 | void HFMA2_cr(u64 insn); | ||
| 160 | void HFMA2_imm(u64 insn); | ||
| 161 | void HFMA2_32I(u64 insn); | ||
| 162 | void HMUL2_reg(u64 insn); | ||
| 163 | void HMUL2_cbuf(u64 insn); | ||
| 164 | void HMUL2_imm(u64 insn); | ||
| 165 | void HMUL2_32I(u64 insn); | ||
| 166 | void HSET2_reg(u64 insn); | ||
| 167 | void HSET2_cbuf(u64 insn); | ||
| 168 | void HSET2_imm(u64 insn); | ||
| 169 | void HSETP2_reg(u64 insn); | ||
| 170 | void HSETP2_cbuf(u64 insn); | ||
| 171 | void HSETP2_imm(u64 insn); | ||
| 172 | void I2F_reg(u64 insn); | ||
| 173 | void I2F_cbuf(u64 insn); | ||
| 174 | void I2F_imm(u64 insn); | ||
| 175 | void I2I_reg(u64 insn); | ||
| 176 | void I2I_cbuf(u64 insn); | ||
| 177 | void I2I_imm(u64 insn); | ||
| 178 | void IADD_reg(u64 insn); | ||
| 179 | void IADD_cbuf(u64 insn); | ||
| 180 | void IADD_imm(u64 insn); | ||
| 181 | void IADD3_reg(u64 insn); | ||
| 182 | void IADD3_cbuf(u64 insn); | ||
| 183 | void IADD3_imm(u64 insn); | ||
| 184 | void IADD32I(u64 insn); | ||
| 185 | void ICMP_reg(u64 insn); | ||
| 186 | void ICMP_rc(u64 insn); | ||
| 187 | void ICMP_cr(u64 insn); | ||
| 188 | void ICMP_imm(u64 insn); | ||
| 189 | void IDE(u64 insn); | ||
| 190 | void IDP_reg(u64 insn); | ||
| 191 | void IDP_imm(u64 insn); | ||
| 192 | void IMAD_reg(u64 insn); | ||
| 193 | void IMAD_rc(u64 insn); | ||
| 194 | void IMAD_cr(u64 insn); | ||
| 195 | void IMAD_imm(u64 insn); | ||
| 196 | void IMAD32I(u64 insn); | ||
| 197 | void IMADSP_reg(u64 insn); | ||
| 198 | void IMADSP_rc(u64 insn); | ||
| 199 | void IMADSP_cr(u64 insn); | ||
| 200 | void IMADSP_imm(u64 insn); | ||
| 201 | void IMNMX_reg(u64 insn); | ||
| 202 | void IMNMX_cbuf(u64 insn); | ||
| 203 | void IMNMX_imm(u64 insn); | ||
| 204 | void IMUL_reg(u64 insn); | ||
| 205 | void IMUL_cbuf(u64 insn); | ||
| 206 | void IMUL_imm(u64 insn); | ||
| 207 | void IMUL32I(u64 insn); | ||
| 208 | void IPA(u64 insn); | ||
| 209 | void ISBERD(u64 insn); | ||
| 210 | void ISCADD_reg(u64 insn); | ||
| 211 | void ISCADD_cbuf(u64 insn); | ||
| 212 | void ISCADD_imm(u64 insn); | ||
| 213 | void ISCADD32I(u64 insn); | ||
| 214 | void ISET_reg(u64 insn); | ||
| 215 | void ISET_cbuf(u64 insn); | ||
| 216 | void ISET_imm(u64 insn); | ||
| 217 | void ISETP_reg(u64 insn); | ||
| 218 | void ISETP_cbuf(u64 insn); | ||
| 219 | void ISETP_imm(u64 insn); | ||
| 220 | void JCAL(u64 insn); | ||
| 221 | void JMP(u64 insn); | ||
| 222 | void JMX(u64 insn); | ||
| 223 | void KIL(); | ||
| 224 | void LD(u64 insn); | ||
| 225 | void LDC(u64 insn); | ||
| 226 | void LDG(u64 insn); | ||
| 227 | void LDL(u64 insn); | ||
| 228 | void LDS(u64 insn); | ||
| 229 | void LEA_hi_reg(u64 insn); | ||
| 230 | void LEA_hi_cbuf(u64 insn); | ||
| 231 | void LEA_lo_reg(u64 insn); | ||
| 232 | void LEA_lo_cbuf(u64 insn); | ||
| 233 | void LEA_lo_imm(u64 insn); | ||
| 234 | void LEPC(u64 insn); | ||
| 235 | void LONGJMP(u64 insn); | ||
| 236 | void LOP_reg(u64 insn); | ||
| 237 | void LOP_cbuf(u64 insn); | ||
| 238 | void LOP_imm(u64 insn); | ||
| 239 | void LOP3_reg(u64 insn); | ||
| 240 | void LOP3_cbuf(u64 insn); | ||
| 241 | void LOP3_imm(u64 insn); | ||
| 242 | void LOP32I(u64 insn); | ||
| 243 | void MEMBAR(u64 insn); | ||
| 244 | void MOV_reg(u64 insn); | ||
| 245 | void MOV_cbuf(u64 insn); | ||
| 246 | void MOV_imm(u64 insn); | ||
| 247 | void MOV32I(u64 insn); | ||
| 248 | void MUFU(u64 insn); | ||
| 249 | void NOP(u64 insn); | ||
| 250 | void OUT_reg(u64 insn); | ||
| 251 | void OUT_cbuf(u64 insn); | ||
| 252 | void OUT_imm(u64 insn); | ||
| 253 | void P2R_reg(u64 insn); | ||
| 254 | void P2R_cbuf(u64 insn); | ||
| 255 | void P2R_imm(u64 insn); | ||
| 256 | void PBK(); | ||
| 257 | void PCNT(); | ||
| 258 | void PEXIT(u64 insn); | ||
| 259 | void PIXLD(u64 insn); | ||
| 260 | void PLONGJMP(u64 insn); | ||
| 261 | void POPC_reg(u64 insn); | ||
| 262 | void POPC_cbuf(u64 insn); | ||
| 263 | void POPC_imm(u64 insn); | ||
| 264 | void PRET(u64 insn); | ||
| 265 | void PRMT_reg(u64 insn); | ||
| 266 | void PRMT_rc(u64 insn); | ||
| 267 | void PRMT_cr(u64 insn); | ||
| 268 | void PRMT_imm(u64 insn); | ||
| 269 | void PSET(u64 insn); | ||
| 270 | void PSETP(u64 insn); | ||
| 271 | void R2B(u64 insn); | ||
| 272 | void R2P_reg(u64 insn); | ||
| 273 | void R2P_cbuf(u64 insn); | ||
| 274 | void R2P_imm(u64 insn); | ||
| 275 | void RAM(u64 insn); | ||
| 276 | void RED(u64 insn); | ||
| 277 | void RET(u64 insn); | ||
| 278 | void RRO_reg(u64 insn); | ||
| 279 | void RRO_cbuf(u64 insn); | ||
| 280 | void RRO_imm(u64 insn); | ||
| 281 | void RTT(u64 insn); | ||
| 282 | void S2R(u64 insn); | ||
| 283 | void SAM(u64 insn); | ||
| 284 | void SEL_reg(u64 insn); | ||
| 285 | void SEL_cbuf(u64 insn); | ||
| 286 | void SEL_imm(u64 insn); | ||
| 287 | void SETCRSPTR(u64 insn); | ||
| 288 | void SETLMEMBASE(u64 insn); | ||
| 289 | void SHF_l_reg(u64 insn); | ||
| 290 | void SHF_l_imm(u64 insn); | ||
| 291 | void SHF_r_reg(u64 insn); | ||
| 292 | void SHF_r_imm(u64 insn); | ||
| 293 | void SHFL(u64 insn); | ||
| 294 | void SHL_reg(u64 insn); | ||
| 295 | void SHL_cbuf(u64 insn); | ||
| 296 | void SHL_imm(u64 insn); | ||
| 297 | void SHR_reg(u64 insn); | ||
| 298 | void SHR_cbuf(u64 insn); | ||
| 299 | void SHR_imm(u64 insn); | ||
| 300 | void SSY(); | ||
| 301 | void ST(u64 insn); | ||
| 302 | void STG(u64 insn); | ||
| 303 | void STL(u64 insn); | ||
| 304 | void STP(u64 insn); | ||
| 305 | void STS(u64 insn); | ||
| 306 | void SUATOM(u64 insn); | ||
| 307 | void SUATOM_cas(u64 insn); | ||
| 308 | void SULD(u64 insn); | ||
| 309 | void SURED(u64 insn); | ||
| 310 | void SUST(u64 insn); | ||
| 311 | void SYNC(u64 insn); | ||
| 312 | void TEX(u64 insn); | ||
| 313 | void TEX_b(u64 insn); | ||
| 314 | void TEXS(u64 insn); | ||
| 315 | void TLD(u64 insn); | ||
| 316 | void TLD_b(u64 insn); | ||
| 317 | void TLD4(u64 insn); | ||
| 318 | void TLD4_b(u64 insn); | ||
| 319 | void TLD4S(u64 insn); | ||
| 320 | void TLDS(u64 insn); | ||
| 321 | void TMML(u64 insn); | ||
| 322 | void TMML_b(u64 insn); | ||
| 323 | void TXA(u64 insn); | ||
| 324 | void TXD(u64 insn); | ||
| 325 | void TXD_b(u64 insn); | ||
| 326 | void TXQ(u64 insn); | ||
| 327 | void TXQ_b(u64 insn); | ||
| 328 | void VABSDIFF(u64 insn); | ||
| 329 | void VABSDIFF4(u64 insn); | ||
| 330 | void VADD(u64 insn); | ||
| 331 | void VMAD(u64 insn); | ||
| 332 | void VMNMX(u64 insn); | ||
| 333 | void VOTE(u64 insn); | ||
| 334 | void VOTE_vtg(u64 insn); | ||
| 335 | void VSET(u64 insn); | ||
| 336 | void VSETP(u64 insn); | ||
| 337 | void VSHL(u64 insn); | ||
| 338 | void VSHR(u64 insn); | ||
| 339 | void XMAD_reg(u64 insn); | ||
| 340 | void XMAD_rc(u64 insn); | ||
| 341 | void XMAD_cr(u64 insn); | ||
| 342 | void XMAD_imm(u64 insn); | ||
| 343 | |||
| 344 | [[nodiscard]] IR::U32 X(IR::Reg reg); | ||
| 345 | [[nodiscard]] IR::U64 L(IR::Reg reg); | ||
| 346 | [[nodiscard]] IR::F32 F(IR::Reg reg); | ||
| 347 | [[nodiscard]] IR::F64 D(IR::Reg reg); | ||
| 348 | |||
| 349 | void X(IR::Reg dest_reg, const IR::U32& value); | ||
| 350 | void L(IR::Reg dest_reg, const IR::U64& value); | ||
| 351 | void F(IR::Reg dest_reg, const IR::F32& value); | ||
| 352 | void D(IR::Reg dest_reg, const IR::F64& value); | ||
| 353 | |||
| 354 | [[nodiscard]] IR::U32 GetReg8(u64 insn); | ||
| 355 | [[nodiscard]] IR::U32 GetReg20(u64 insn); | ||
| 356 | [[nodiscard]] IR::U32 GetReg39(u64 insn); | ||
| 357 | [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); | ||
| 358 | [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); | ||
| 359 | [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); | ||
| 360 | [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); | ||
| 361 | [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn); | ||
| 362 | |||
| 363 | [[nodiscard]] IR::U32 GetCbuf(u64 insn); | ||
| 364 | [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); | ||
| 365 | [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); | ||
| 366 | [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); | ||
| 367 | |||
| 368 | [[nodiscard]] IR::U32 GetImm20(u64 insn); | ||
| 369 | [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); | ||
| 370 | [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); | ||
| 371 | [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); | ||
| 372 | |||
| 373 | [[nodiscard]] IR::U32 GetImm32(u64 insn); | ||
| 374 | [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); | ||
| 375 | |||
| 376 | void SetZFlag(const IR::U1& value); | ||
| 377 | void SetSFlag(const IR::U1& value); | ||
| 378 | void SetCFlag(const IR::U1& value); | ||
| 379 | void SetOFlag(const IR::U1& value); | ||
| 380 | |||
| 381 | void ResetZero(); | ||
| 382 | void ResetSFlag(); | ||
| 383 | void ResetCFlag(); | ||
| 384 | void ResetOFlag(); | ||
| 385 | }; | ||
| 386 | |||
| 387 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..8ffd84867 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, | ||
| 12 | bool cc) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a; | ||
| 17 | } const iadd{insn}; | ||
| 18 | |||
| 19 | if (sat) { | ||
| 20 | throw NotImplementedException("IADD SAT"); | ||
| 21 | } | ||
| 22 | if (x && po) { | ||
| 23 | throw NotImplementedException("IADD X+PO"); | ||
| 24 | } | ||
| 25 | // Operand A is always read from here, negated if needed | ||
| 26 | IR::U32 op_a{v.X(iadd.src_a)}; | ||
| 27 | if (neg_a) { | ||
| 28 | op_a = v.ir.INeg(op_a); | ||
| 29 | } | ||
| 30 | // Add both operands | ||
| 31 | IR::U32 result{v.ir.IAdd(op_a, op_b)}; | ||
| 32 | if (x) { | ||
| 33 | const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||
| 34 | result = v.ir.IAdd(result, carry); | ||
| 35 | } | ||
| 36 | if (po) { | ||
| 37 | // .PO adds one to the result | ||
| 38 | result = v.ir.IAdd(result, v.ir.Imm32(1)); | ||
| 39 | } | ||
| 40 | if (cc) { | ||
| 41 | // Store flags | ||
| 42 | // TODO: Does this grab the result pre-PO or after? | ||
| 43 | if (po) { | ||
| 44 | throw NotImplementedException("IADD CC+PO"); | ||
| 45 | } | ||
| 46 | // TODO: How does CC behave when X is set? | ||
| 47 | if (x) { | ||
| 48 | throw NotImplementedException("IADD X+CC"); | ||
| 49 | } | ||
| 50 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 51 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 52 | v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||
| 53 | v.SetOFlag(v.ir.GetOverflowFromOp(result)); | ||
| 54 | } | ||
| 55 | // Store result | ||
| 56 | v.X(iadd.dest_reg, result); | ||
| 57 | } | ||
| 58 | |||
| 59 | void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 60 | union { | ||
| 61 | u64 insn; | ||
| 62 | BitField<43, 1, u64> x; | ||
| 63 | BitField<47, 1, u64> cc; | ||
| 64 | BitField<48, 2, u64> three_for_po; | ||
| 65 | BitField<48, 1, u64> neg_b; | ||
| 66 | BitField<49, 1, u64> neg_a; | ||
| 67 | BitField<50, 1, u64> sat; | ||
| 68 | } const iadd{insn}; | ||
| 69 | |||
| 70 | const bool po{iadd.three_for_po == 3}; | ||
| 71 | if (!po && iadd.neg_b != 0) { | ||
| 72 | op_b = v.ir.INeg(op_b); | ||
| 73 | } | ||
| 74 | IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); | ||
| 75 | } | ||
| 76 | } // Anonymous namespace | ||
| 77 | |||
| 78 | void TranslatorVisitor::IADD_reg(u64 insn) { | ||
| 79 | IADD(*this, insn, GetReg20(insn)); | ||
| 80 | } | ||
| 81 | |||
| 82 | void TranslatorVisitor::IADD_cbuf(u64 insn) { | ||
| 83 | IADD(*this, insn, GetCbuf(insn)); | ||
| 84 | } | ||
| 85 | |||
| 86 | void TranslatorVisitor::IADD_imm(u64 insn) { | ||
| 87 | IADD(*this, insn, GetImm20(insn)); | ||
| 88 | } | ||
| 89 | |||
| 90 | void TranslatorVisitor::IADD32I(u64 insn) { | ||
| 91 | union { | ||
| 92 | u64 raw; | ||
| 93 | BitField<52, 1, u64> cc; | ||
| 94 | BitField<53, 1, u64> x; | ||
| 95 | BitField<54, 1, u64> sat; | ||
| 96 | BitField<55, 2, u64> three_for_po; | ||
| 97 | BitField<56, 1, u64> neg_a; | ||
| 98 | } const iadd32i{insn}; | ||
| 99 | |||
| 100 | const bool po{iadd32i.three_for_po == 3}; | ||
| 101 | const bool neg_a{!po && iadd32i.neg_a != 0}; | ||
| 102 | IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); | ||
| 103 | } | ||
| 104 | |||
| 105 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp new file mode 100644 index 000000000..040cfc10f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Shift : u64 { | ||
| 12 | None, | ||
| 13 | Right, | ||
| 14 | Left, | ||
| 15 | }; | ||
| 16 | enum class Half : u64 { | ||
| 17 | All, | ||
| 18 | Lower, | ||
| 19 | Upper, | ||
| 20 | }; | ||
| 21 | |||
| 22 | [[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { | ||
| 23 | constexpr bool is_signed{false}; | ||
| 24 | switch (half) { | ||
| 25 | case Half::All: | ||
| 26 | return value; | ||
| 27 | case Half::Lower: | ||
| 28 | return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); | ||
| 29 | case Half::Upper: | ||
| 30 | return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); | ||
| 31 | } | ||
| 32 | throw NotImplementedException("Invalid half"); | ||
| 33 | } | ||
| 34 | |||
| 35 | [[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { | ||
| 36 | switch (shift) { | ||
| 37 | case Shift::None: | ||
| 38 | return value; | ||
| 39 | case Shift::Right: { | ||
| 40 | // 33-bit RS IADD3 edge case | ||
| 41 | const IR::U1 edge_case{ir.GetCarryFromOp(value)}; | ||
| 42 | const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))}; | ||
| 43 | return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)}; | ||
| 44 | } | ||
| 45 | case Shift::Left: | ||
| 46 | return ir.ShiftLeftLogical(value, ir.Imm32(16)); | ||
| 47 | } | ||
| 48 | throw NotImplementedException("Invalid shift"); | ||
| 49 | } | ||
| 50 | |||
| 51 | void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c, | ||
| 52 | Shift shift = Shift::None) { | ||
| 53 | union { | ||
| 54 | u64 insn; | ||
| 55 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 56 | BitField<47, 1, u64> cc; | ||
| 57 | BitField<48, 1, u64> x; | ||
| 58 | BitField<49, 1, u64> neg_c; | ||
| 59 | BitField<50, 1, u64> neg_b; | ||
| 60 | BitField<51, 1, u64> neg_a; | ||
| 61 | } iadd3{insn}; | ||
| 62 | |||
| 63 | if (iadd3.neg_a != 0) { | ||
| 64 | op_a = v.ir.INeg(op_a); | ||
| 65 | } | ||
| 66 | if (iadd3.neg_b != 0) { | ||
| 67 | op_b = v.ir.INeg(op_b); | ||
| 68 | } | ||
| 69 | if (iadd3.neg_c != 0) { | ||
| 70 | op_c = v.ir.INeg(op_c); | ||
| 71 | } | ||
| 72 | IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; | ||
| 73 | if (iadd3.x != 0) { | ||
| 74 | // TODO: How does RS behave when X is set? | ||
| 75 | if (shift == Shift::Right) { | ||
| 76 | throw NotImplementedException("IADD3 X+RS"); | ||
| 77 | } | ||
| 78 | const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||
| 79 | lhs_1 = v.ir.IAdd(lhs_1, carry); | ||
| 80 | } | ||
| 81 | const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)}; | ||
| 82 | const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; | ||
| 83 | |||
| 84 | v.X(iadd3.dest_reg, result); | ||
| 85 | if (iadd3.cc != 0) { | ||
| 86 | // TODO: How does CC behave when X is set? | ||
| 87 | if (iadd3.x != 0) { | ||
| 88 | throw NotImplementedException("IADD3 X+CC"); | ||
| 89 | } | ||
| 90 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 91 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 92 | v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||
| 93 | const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)}; | ||
| 94 | v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | } // Anonymous namespace | ||
| 98 | |||
| 99 | void TranslatorVisitor::IADD3_reg(u64 insn) { | ||
| 100 | union { | ||
| 101 | u64 insn; | ||
| 102 | BitField<37, 2, Shift> shift; | ||
| 103 | BitField<35, 2, Half> half_a; | ||
| 104 | BitField<33, 2, Half> half_b; | ||
| 105 | BitField<31, 2, Half> half_c; | ||
| 106 | } const iadd3{insn}; | ||
| 107 | |||
| 108 | const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; | ||
| 109 | const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; | ||
| 110 | const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; | ||
| 111 | IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift); | ||
| 112 | } | ||
| 113 | |||
| 114 | void TranslatorVisitor::IADD3_cbuf(u64 insn) { | ||
| 115 | IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn)); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::IADD3_imm(u64 insn) { | ||
| 119 | IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn)); | ||
| 120 | } | ||
| 121 | |||
| 122 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp new file mode 100644 index 000000000..ba6e01926 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<48, 1, u64> is_signed; | ||
| 18 | BitField<49, 3, CompareOp> compare_op; | ||
| 19 | } const icmp{insn}; | ||
| 20 | |||
| 21 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 22 | const bool is_signed{icmp.is_signed != 0}; | ||
| 23 | const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)}; | ||
| 24 | |||
| 25 | const IR::U32 src_reg{v.X(icmp.src_reg)}; | ||
| 26 | const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; | ||
| 27 | |||
| 28 | v.X(icmp.dest_reg, result); | ||
| 29 | } | ||
| 30 | } // Anonymous namespace | ||
| 31 | |||
| 32 | void TranslatorVisitor::ICMP_reg(u64 insn) { | ||
| 33 | ICMP(*this, insn, GetReg20(insn), GetReg39(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::ICMP_rc(u64 insn) { | ||
| 37 | ICMP(*this, insn, GetReg39(insn), GetCbuf(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::ICMP_cr(u64 insn) { | ||
| 41 | ICMP(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::ICMP_imm(u64 insn) { | ||
| 45 | ICMP(*this, insn, GetImm20(insn), GetReg39(insn)); | ||
| 46 | } | ||
| 47 | |||
| 48 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp new file mode 100644 index 000000000..8ce1aee04 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 13 | CompareOp compare_op, bool is_signed, bool x) { | ||
| 14 | return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) | ||
| 15 | : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); | ||
| 16 | } | ||
| 17 | |||
| 18 | void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { | ||
| 19 | union { | ||
| 20 | u64 insn; | ||
| 21 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 22 | BitField<8, 8, IR::Reg> src_reg; | ||
| 23 | BitField<39, 3, IR::Pred> pred; | ||
| 24 | BitField<42, 1, u64> neg_pred; | ||
| 25 | BitField<43, 1, u64> x; | ||
| 26 | BitField<44, 1, u64> bf; | ||
| 27 | BitField<45, 2, BooleanOp> bop; | ||
| 28 | BitField<47, 1, u64> cc; | ||
| 29 | BitField<48, 1, u64> is_signed; | ||
| 30 | BitField<49, 3, CompareOp> compare_op; | ||
| 31 | } const iset{insn}; | ||
| 32 | |||
| 33 | const IR::U32 src_a{v.X(iset.src_reg)}; | ||
| 34 | const bool is_signed{iset.is_signed != 0}; | ||
| 35 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 36 | const bool x{iset.x != 0}; | ||
| 37 | const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)}; | ||
| 38 | |||
| 39 | IR::U1 pred{v.ir.GetPred(iset.pred)}; | ||
| 40 | if (iset.neg_pred != 0) { | ||
| 41 | pred = v.ir.LogicalNot(pred); | ||
| 42 | } | ||
| 43 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; | ||
| 44 | |||
| 45 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 46 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 47 | const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; | ||
| 48 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 49 | |||
| 50 | v.X(iset.dest_reg, result); | ||
| 51 | if (iset.cc != 0) { | ||
| 52 | if (x) { | ||
| 53 | throw NotImplementedException("ISET.CC + X"); | ||
| 54 | } | ||
| 55 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 56 | v.SetZFlag(is_zero); | ||
| 57 | if (iset.bf != 0) { | ||
| 58 | v.ResetSFlag(); | ||
| 59 | } else { | ||
| 60 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 61 | } | ||
| 62 | v.ResetCFlag(); | ||
| 63 | v.ResetOFlag(); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | } // Anonymous namespace | ||
| 67 | |||
| 68 | void TranslatorVisitor::ISET_reg(u64 insn) { | ||
| 69 | ISET(*this, insn, GetReg20(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::ISET_cbuf(u64 insn) { | ||
| 73 | ISET(*this, insn, GetCbuf(insn)); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::ISET_imm(u64 insn) { | ||
| 77 | ISET(*this, insn, GetImm20(insn)); | ||
| 78 | } | ||
| 79 | |||
| 80 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..0b8119ddd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class FloatFormat : u64 { | ||
| 13 | F16 = 1, | ||
| 14 | F32 = 2, | ||
| 15 | F64 = 3, | ||
| 16 | }; | ||
| 17 | |||
| 18 | enum class IntFormat : u64 { | ||
| 19 | U8 = 0, | ||
| 20 | U16 = 1, | ||
| 21 | U32 = 2, | ||
| 22 | U64 = 3, | ||
| 23 | }; | ||
| 24 | |||
| 25 | union Encoding { | ||
| 26 | u64 raw; | ||
| 27 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 28 | BitField<8, 2, FloatFormat> float_format; | ||
| 29 | BitField<10, 2, IntFormat> int_format; | ||
| 30 | BitField<13, 1, u64> is_signed; | ||
| 31 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 32 | BitField<41, 2, u64> selector; | ||
| 33 | BitField<47, 1, u64> cc; | ||
| 34 | BitField<45, 1, u64> neg; | ||
| 35 | BitField<49, 1, u64> abs; | ||
| 36 | }; | ||
| 37 | |||
| 38 | bool Is64(u64 insn) { | ||
| 39 | return Encoding{insn}.int_format == IntFormat::U64; | ||
| 40 | } | ||
| 41 | |||
| 42 | int BitSize(FloatFormat format) { | ||
| 43 | switch (format) { | ||
| 44 | case FloatFormat::F16: | ||
| 45 | return 16; | ||
| 46 | case FloatFormat::F32: | ||
| 47 | return 32; | ||
| 48 | case FloatFormat::F64: | ||
| 49 | return 64; | ||
| 50 | } | ||
| 51 | throw NotImplementedException("Invalid float format {}", format); | ||
| 52 | } | ||
| 53 | |||
| 54 | IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { | ||
| 55 | const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; | ||
| 56 | const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; | ||
| 57 | const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; | ||
| 58 | const IR::U1 is_least{v.ir.IEqual(value, least_value)}; | ||
| 59 | return IR::U32{v.ir.Select(is_least, value, absolute)}; | ||
| 60 | } | ||
| 61 | |||
| 62 | void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { | ||
| 63 | const Encoding i2f{insn}; | ||
| 64 | if (i2f.cc != 0) { | ||
| 65 | throw NotImplementedException("I2F CC"); | ||
| 66 | } | ||
| 67 | const bool is_signed{i2f.is_signed != 0}; | ||
| 68 | int src_bitsize{}; | ||
| 69 | switch (i2f.int_format) { | ||
| 70 | case IntFormat::U8: | ||
| 71 | src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), | ||
| 72 | v.ir.Imm32(8), is_signed); | ||
| 73 | if (i2f.abs != 0) { | ||
| 74 | src = SmallAbs(v, src, 8); | ||
| 75 | } | ||
| 76 | src_bitsize = 8; | ||
| 77 | break; | ||
| 78 | case IntFormat::U16: | ||
| 79 | if (i2f.selector == 1 || i2f.selector == 3) { | ||
| 80 | throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); | ||
| 81 | } | ||
| 82 | src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), | ||
| 83 | v.ir.Imm32(16), is_signed); | ||
| 84 | if (i2f.abs != 0) { | ||
| 85 | src = SmallAbs(v, src, 16); | ||
| 86 | } | ||
| 87 | src_bitsize = 16; | ||
| 88 | break; | ||
| 89 | case IntFormat::U32: | ||
| 90 | case IntFormat::U64: | ||
| 91 | if (i2f.selector != 0) { | ||
| 92 | throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); | ||
| 93 | } | ||
| 94 | if (i2f.abs != 0 && is_signed) { | ||
| 95 | src = v.ir.IAbs(src); | ||
| 96 | } | ||
| 97 | src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; | ||
| 101 | const int dst_bitsize{BitSize(i2f.float_format)}; | ||
| 102 | const IR::FpControl fp_control{ | ||
| 103 | .no_contraction = false, | ||
| 104 | .rounding = CastFpRounding(i2f.fp_rounding), | ||
| 105 | .fmz_mode = IR::FmzMode::DontCare, | ||
| 106 | }; | ||
| 107 | auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize), | ||
| 108 | static_cast<size_t>(conversion_src_bitsize), is_signed, src, | ||
| 109 | fp_control)}; | ||
| 110 | if (i2f.neg != 0) { | ||
| 111 | if (i2f.abs != 0 || !is_signed) { | ||
| 112 | // We know the value is positive | ||
| 113 | value = v.ir.FPNeg(value); | ||
| 114 | } else { | ||
| 115 | // Only negate if the input isn't the lowest value | ||
| 116 | IR::U1 is_least; | ||
| 117 | if (src_bitsize == 64) { | ||
| 118 | is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min())); | ||
| 119 | } else if (src_bitsize == 32) { | ||
| 120 | is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min())); | ||
| 121 | } else { | ||
| 122 | const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; | ||
| 123 | is_least = v.ir.IEqual(src, least_value); | ||
| 124 | } | ||
| 125 | value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | switch (i2f.float_format) { | ||
| 129 | case FloatFormat::F16: { | ||
| 130 | const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; | ||
| 131 | v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | case FloatFormat::F32: | ||
| 135 | v.F(i2f.dest_reg, value); | ||
| 136 | break; | ||
| 137 | case FloatFormat::F64: { | ||
| 138 | if (!IR::IsAligned(i2f.dest_reg, 2)) { | ||
| 139 | throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); | ||
| 140 | } | ||
| 141 | const IR::Value vector{v.ir.UnpackDouble2x32(value)}; | ||
| 142 | for (int i = 0; i < 2; ++i) { | ||
| 143 | v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 144 | } | ||
| 145 | break; | ||
| 146 | } | ||
| 147 | default: | ||
| 148 | throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | } // Anonymous namespace | ||
| 152 | |||
| 153 | void TranslatorVisitor::I2F_reg(u64 insn) { | ||
| 154 | if (Is64(insn)) { | ||
| 155 | union { | ||
| 156 | u64 raw; | ||
| 157 | BitField<20, 8, IR::Reg> reg; | ||
| 158 | } const value{insn}; | ||
| 159 | const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; | ||
| 160 | I2F(*this, insn, ir.PackUint2x32(regs)); | ||
| 161 | } else { | ||
| 162 | I2F(*this, insn, GetReg20(insn)); | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 166 | void TranslatorVisitor::I2F_cbuf(u64 insn) { | ||
| 167 | if (Is64(insn)) { | ||
| 168 | I2F(*this, insn, GetPackedCbuf(insn)); | ||
| 169 | } else { | ||
| 170 | I2F(*this, insn, GetCbuf(insn)); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | void TranslatorVisitor::I2F_imm(u64 insn) { | ||
| 175 | if (Is64(insn)) { | ||
| 176 | I2F(*this, insn, GetPackedImm20(insn)); | ||
| 177 | } else { | ||
| 178 | I2F(*this, insn, GetImm20(insn)); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp new file mode 100644 index 000000000..5feefc0ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class MaxShift : u64 { | ||
| 12 | U32, | ||
| 13 | Undefined, | ||
| 14 | U64, | ||
| 15 | S64, | ||
| 16 | }; | ||
| 17 | |||
| 18 | IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift, | ||
| 19 | bool right_shift, bool is_signed) { | ||
| 20 | if (!right_shift) { | ||
| 21 | return ir.ShiftLeftLogical(packed_int, safe_shift); | ||
| 22 | } | ||
| 23 | if (is_signed) { | ||
| 24 | return ir.ShiftRightArithmetic(packed_int, safe_shift); | ||
| 25 | } | ||
| 26 | return ir.ShiftRightLogical(packed_int, safe_shift); | ||
| 27 | } | ||
| 28 | |||
| 29 | void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits, | ||
| 30 | bool right_shift) { | ||
| 31 | union { | ||
| 32 | u64 insn; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<0, 8, IR::Reg> lo_bits_reg; | ||
| 35 | BitField<37, 2, MaxShift> max_shift; | ||
| 36 | BitField<47, 1, u64> cc; | ||
| 37 | BitField<48, 2, u64> x_mode; | ||
| 38 | BitField<50, 1, u64> wrap; | ||
| 39 | } const shf{insn}; | ||
| 40 | |||
| 41 | if (shf.cc != 0) { | ||
| 42 | throw NotImplementedException("SHF CC"); | ||
| 43 | } | ||
| 44 | if (shf.x_mode != 0) { | ||
| 45 | throw NotImplementedException("SHF X Mode"); | ||
| 46 | } | ||
| 47 | if (shf.max_shift == MaxShift::Undefined) { | ||
| 48 | throw NotImplementedException("SHF Use of undefined MaxShift value"); | ||
| 49 | } | ||
| 50 | const IR::U32 low_bits{v.X(shf.lo_bits_reg)}; | ||
| 51 | const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))}; | ||
| 52 | const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)}; | ||
| 53 | const IR::U32 safe_shift{shf.wrap != 0 | ||
| 54 | ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1))) | ||
| 55 | : v.ir.UMin(shift, max_shift)}; | ||
| 56 | |||
| 57 | const bool is_signed{shf.max_shift == MaxShift::S64}; | ||
| 58 | const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)}; | ||
| 59 | const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)}; | ||
| 60 | |||
| 61 | const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)}; | ||
| 62 | v.X(shf.dest_reg, result); | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void TranslatorVisitor::SHF_l_reg(u64 insn) { | ||
| 67 | SHF(*this, insn, GetReg20(insn), GetReg39(insn), false); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::SHF_l_imm(u64 insn) { | ||
| 71 | SHF(*this, insn, GetImm20(insn), GetReg39(insn), false); | ||
| 72 | } | ||
| 73 | |||
| 74 | void TranslatorVisitor::SHF_r_reg(u64 insn) { | ||
| 75 | SHF(*this, insn, GetReg20(insn), GetReg39(insn), true); | ||
| 76 | } | ||
| 77 | |||
| 78 | void TranslatorVisitor::SHF_r_imm(u64 insn) { | ||
| 79 | SHF(*this, insn, GetImm20(insn), GetReg39(insn), true); | ||
| 80 | } | ||
| 81 | |||
| 82 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp new file mode 100644 index 000000000..1badbacc4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<43, 2, u64> mode; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> is_signed; | ||
| 21 | } const imnmx{insn}; | ||
| 22 | |||
| 23 | if (imnmx.cc != 0) { | ||
| 24 | throw NotImplementedException("IMNMX CC"); | ||
| 25 | } | ||
| 26 | |||
| 27 | if (imnmx.mode != 0) { | ||
| 28 | throw NotImplementedException("IMNMX.MODE"); | ||
| 29 | } | ||
| 30 | |||
| 31 | const IR::U1 pred{v.ir.GetPred(imnmx.pred)}; | ||
| 32 | const IR::U32 op_a{v.X(imnmx.src_reg)}; | ||
| 33 | IR::U32 min; | ||
| 34 | IR::U32 max; | ||
| 35 | |||
| 36 | if (imnmx.is_signed != 0) { | ||
| 37 | min = IR::U32{v.ir.SMin(op_a, op_b)}; | ||
| 38 | max = IR::U32{v.ir.SMax(op_a, op_b)}; | ||
| 39 | } else { | ||
| 40 | min = IR::U32{v.ir.UMin(op_a, op_b)}; | ||
| 41 | max = IR::U32{v.ir.UMax(op_a, op_b)}; | ||
| 42 | } | ||
| 43 | if (imnmx.neg_pred != 0) { | ||
| 44 | std::swap(min, max); | ||
| 45 | } | ||
| 46 | |||
| 47 | const IR::U32 result{v.ir.Select(pred, min, max)}; | ||
| 48 | v.X(imnmx.dest_reg, result); | ||
| 49 | } | ||
| 50 | } // Anonymous namespace | ||
| 51 | |||
| 52 | void TranslatorVisitor::IMNMX_reg(u64 insn) { | ||
| 53 | IMNMX(*this, insn, GetReg20(insn)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::IMNMX_cbuf(u64 insn) { | ||
| 57 | IMNMX(*this, insn, GetCbuf(insn)); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::IMNMX_imm(u64 insn) { | ||
| 61 | IMNMX(*this, insn, GetImm20(insn)); | ||
| 62 | } | ||
| 63 | |||
| 64 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp new file mode 100644 index 000000000..5ece7678d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<40, 1, u64> tilde; | ||
| 16 | } const popc{insn}; | ||
| 17 | |||
| 18 | const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src); | ||
| 19 | const IR::U32 result = v.ir.BitCount(operand); | ||
| 20 | v.X(popc.dest_reg, result); | ||
| 21 | } | ||
| 22 | } // Anonymous namespace | ||
| 23 | |||
| 24 | void TranslatorVisitor::POPC_reg(u64 insn) { | ||
| 25 | POPC(*this, insn, GetReg20(insn)); | ||
| 26 | } | ||
| 27 | |||
| 28 | void TranslatorVisitor::POPC_cbuf(u64 insn) { | ||
| 29 | POPC(*this, insn, GetCbuf(insn)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::POPC_imm(u64 insn) { | ||
| 33 | POPC(*this, insn, GetImm20(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..044671943 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b, | ||
| 12 | u64 scale_imm) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> op_a; | ||
| 17 | } const iscadd{insn}; | ||
| 18 | |||
| 19 | const bool po{neg_a && neg_b}; | ||
| 20 | IR::U32 op_a{v.X(iscadd.op_a)}; | ||
| 21 | if (po) { | ||
| 22 | // When PO is present, add one | ||
| 23 | op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); | ||
| 24 | } else { | ||
| 25 | // When PO is not present, the bits are interpreted as negation | ||
| 26 | if (neg_a) { | ||
| 27 | op_a = v.ir.INeg(op_a); | ||
| 28 | } | ||
| 29 | if (neg_b) { | ||
| 30 | op_b = v.ir.INeg(op_b); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | // With the operands already processed, scale A | ||
| 34 | const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))}; | ||
| 35 | const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; | ||
| 36 | |||
| 37 | const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; | ||
| 38 | v.X(iscadd.dest_reg, result); | ||
| 39 | |||
| 40 | if (cc) { | ||
| 41 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 42 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 43 | const IR::U1 carry{v.ir.GetCarryFromOp(result)}; | ||
| 44 | const IR::U1 overflow{v.ir.GetOverflowFromOp(result)}; | ||
| 45 | v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry); | ||
| 46 | v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 51 | union { | ||
| 52 | u64 raw; | ||
| 53 | BitField<47, 1, u64> cc; | ||
| 54 | BitField<48, 1, u64> neg_b; | ||
| 55 | BitField<49, 1, u64> neg_a; | ||
| 56 | BitField<39, 5, u64> scale; | ||
| 57 | } const iscadd{insn}; | ||
| 58 | |||
| 59 | ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // Anonymous namespace | ||
| 63 | |||
| 64 | void TranslatorVisitor::ISCADD_reg(u64 insn) { | ||
| 65 | ISCADD(*this, insn, GetReg20(insn)); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::ISCADD_cbuf(u64 insn) { | ||
| 69 | ISCADD(*this, insn, GetCbuf(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::ISCADD_imm(u64 insn) { | ||
| 73 | ISCADD(*this, insn, GetImm20(insn)); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::ISCADD32I(u64 insn) { | ||
| 77 | union { | ||
| 78 | u64 raw; | ||
| 79 | BitField<52, 1, u64> cc; | ||
| 80 | BitField<53, 5, u64> scale; | ||
| 81 | } const iscadd{insn}; | ||
| 82 | |||
| 83 | return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale); | ||
| 84 | } | ||
| 85 | |||
| 86 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..bee10e5b9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 13 | CompareOp compare_op, bool is_signed, bool x) { | ||
| 14 | return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) | ||
| 15 | : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); | ||
| 16 | } | ||
| 17 | |||
| 18 | void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 19 | union { | ||
| 20 | u64 raw; | ||
| 21 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 22 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 23 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 24 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 25 | BitField<42, 1, u64> neg_bop_pred; | ||
| 26 | BitField<43, 1, u64> x; | ||
| 27 | BitField<45, 2, BooleanOp> bop; | ||
| 28 | BitField<48, 1, u64> is_signed; | ||
| 29 | BitField<49, 3, CompareOp> compare_op; | ||
| 30 | } const isetp{insn}; | ||
| 31 | |||
| 32 | const bool is_signed{isetp.is_signed != 0}; | ||
| 33 | const bool x{isetp.x != 0}; | ||
| 34 | const BooleanOp bop{isetp.bop}; | ||
| 35 | const CompareOp compare_op{isetp.compare_op}; | ||
| 36 | const IR::U32 op_a{v.X(isetp.src_reg_a)}; | ||
| 37 | const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)}; | ||
| 38 | const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; | ||
| 39 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 40 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 41 | v.ir.SetPred(isetp.dest_pred_a, result_a); | ||
| 42 | v.ir.SetPred(isetp.dest_pred_b, result_b); | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::ISETP_reg(u64 insn) { | ||
| 47 | ISETP(*this, insn, GetReg20(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::ISETP_cbuf(u64 insn) { | ||
| 51 | ISETP(*this, insn, GetCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::ISETP_imm(u64 insn) { | ||
| 55 | ISETP(*this, insn, GetImm20(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..20af68852 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 16 | BitField<39, 1, u64> w; | ||
| 17 | BitField<43, 1, u64> x; | ||
| 18 | BitField<47, 1, u64> cc; | ||
| 19 | } const shl{insn}; | ||
| 20 | |||
| 21 | if (shl.x != 0) { | ||
| 22 | throw NotImplementedException("SHL.X"); | ||
| 23 | } | ||
| 24 | if (shl.cc != 0) { | ||
| 25 | throw NotImplementedException("SHL.CC"); | ||
| 26 | } | ||
| 27 | const IR::U32 base{v.X(shl.src_reg_a)}; | ||
| 28 | IR::U32 result; | ||
| 29 | if (shl.w != 0) { | ||
| 30 | // When .W is set, the shift value is wrapped | ||
| 31 | // To emulate this we just have to wrap it ourselves. | ||
| 32 | const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; | ||
| 33 | result = v.ir.ShiftLeftLogical(base, shift); | ||
| 34 | } else { | ||
| 35 | // When .W is not set, the shift value is clamped between 0 and 32. | ||
| 36 | // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. | ||
| 37 | // We can safely evaluate an out of bounds shift according to the SPIR-V specification: | ||
| 38 | // | ||
| 39 | // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical | ||
| 40 | // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than | ||
| 41 | // or equal to the bit width of the components of Base." | ||
| 42 | // | ||
| 43 | // And on the GLASM specification it is also safe to evaluate out of bounds: | ||
| 44 | // | ||
| 45 | // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt | ||
| 46 | // "The results of a shift operation ("<<") are undefined if the value of the second operand | ||
| 47 | // is negative, or greater than or equal to the number of bits in the first operand." | ||
| 48 | // | ||
| 49 | // Emphasis on undefined results in contrast to undefined behavior. | ||
| 50 | // | ||
| 51 | const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; | ||
| 52 | const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; | ||
| 53 | result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))}; | ||
| 54 | } | ||
| 55 | v.X(shl.dest_reg, result); | ||
| 56 | } | ||
| 57 | } // Anonymous namespace | ||
| 58 | |||
| 59 | void TranslatorVisitor::SHL_reg(u64 insn) { | ||
| 60 | SHL(*this, insn, GetReg20(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::SHL_cbuf(u64 insn) { | ||
| 64 | SHL(*this, insn, GetCbuf(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::SHL_imm(u64 insn) { | ||
| 68 | SHL(*this, insn, GetImm20(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp new file mode 100644 index 000000000..be00bb605 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 16 | BitField<39, 1, u64> is_wrapped; | ||
| 17 | BitField<40, 1, u64> brev; | ||
| 18 | BitField<43, 1, u64> xmode; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> is_signed; | ||
| 21 | } const shr{insn}; | ||
| 22 | |||
| 23 | if (shr.xmode != 0) { | ||
| 24 | throw NotImplementedException("SHR.XMODE"); | ||
| 25 | } | ||
| 26 | if (shr.cc != 0) { | ||
| 27 | throw NotImplementedException("SHR.CC"); | ||
| 28 | } | ||
| 29 | |||
| 30 | IR::U32 base{v.X(shr.src_reg_a)}; | ||
| 31 | if (shr.brev == 1) { | ||
| 32 | base = v.ir.BitReverse(base); | ||
| 33 | } | ||
| 34 | IR::U32 result; | ||
| 35 | const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); | ||
| 36 | if (shr.is_signed == 1) { | ||
| 37 | result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; | ||
| 38 | } else { | ||
| 39 | result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; | ||
| 40 | } | ||
| 41 | |||
| 42 | if (shr.is_wrapped == 0) { | ||
| 43 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 44 | const IR::U32 safe_bits{v.ir.Imm32(32)}; | ||
| 45 | |||
| 46 | const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)}; | ||
| 47 | const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)}; | ||
| 48 | const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; | ||
| 49 | result = IR::U32{v.ir.Select(is_safe, result, clamped_value)}; | ||
| 50 | } | ||
| 51 | v.X(shr.dest_reg, result); | ||
| 52 | } | ||
| 53 | } // Anonymous namespace | ||
| 54 | |||
| 55 | void TranslatorVisitor::SHR_reg(u64 insn) { | ||
| 56 | SHR(*this, insn, GetReg20(insn)); | ||
| 57 | } | ||
| 58 | |||
| 59 | void TranslatorVisitor::SHR_cbuf(u64 insn) { | ||
| 60 | SHR(*this, insn, GetCbuf(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::SHR_imm(u64 insn) { | ||
| 64 | SHR(*this, insn, GetImm20(insn)); | ||
| 65 | } | ||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..2932cdc42 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SelectMode : u64 { | ||
| 12 | Default, | ||
| 13 | CLO, | ||
| 14 | CHI, | ||
| 15 | CSFU, | ||
| 16 | CBCC, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class Half : u64 { | ||
| 20 | H0, // Least-significant bits (15:0) | ||
| 21 | H1, // Most-significant bits (31:16) | ||
| 22 | }; | ||
| 23 | |||
| 24 | IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { | ||
| 25 | const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; | ||
| 26 | return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); | ||
| 27 | } | ||
| 28 | |||
| 29 | void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, | ||
| 30 | SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { | ||
| 31 | union { | ||
| 32 | u64 raw; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 35 | BitField<47, 1, u64> cc; | ||
| 36 | BitField<48, 1, u64> is_a_signed; | ||
| 37 | BitField<49, 1, u64> is_b_signed; | ||
| 38 | BitField<53, 1, Half> half_a; | ||
| 39 | } const xmad{insn}; | ||
| 40 | |||
| 41 | if (x) { | ||
| 42 | throw NotImplementedException("XMAD X"); | ||
| 43 | } | ||
| 44 | const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; | ||
| 45 | const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; | ||
| 46 | |||
| 47 | IR::U32 product{v.ir.IMul(op_a, op_b)}; | ||
| 48 | if (psl) { | ||
| 49 | // .PSL shifts the product 16 bits | ||
| 50 | product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); | ||
| 51 | } | ||
| 52 | const IR::U32 op_c{[&]() -> IR::U32 { | ||
| 53 | switch (select_mode) { | ||
| 54 | case SelectMode::Default: | ||
| 55 | return src_c; | ||
| 56 | case SelectMode::CLO: | ||
| 57 | return ExtractHalf(v, src_c, Half::H0, false); | ||
| 58 | case SelectMode::CHI: | ||
| 59 | return ExtractHalf(v, src_c, Half::H1, false); | ||
| 60 | case SelectMode::CBCC: | ||
| 61 | return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); | ||
| 62 | case SelectMode::CSFU: | ||
| 63 | throw NotImplementedException("XMAD CSFU"); | ||
| 64 | } | ||
| 65 | throw NotImplementedException("Invalid XMAD select mode {}", select_mode); | ||
| 66 | }()}; | ||
| 67 | IR::U32 result{v.ir.IAdd(product, op_c)}; | ||
| 68 | if (mrg) { | ||
| 69 | // .MRG inserts src_b [15:0] into result's [31:16]. | ||
| 70 | const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; | ||
| 71 | result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); | ||
| 72 | } | ||
| 73 | if (xmad.cc) { | ||
| 74 | throw NotImplementedException("XMAD CC"); | ||
| 75 | } | ||
| 76 | // Store result | ||
| 77 | v.X(xmad.dest_reg, result); | ||
| 78 | } | ||
| 79 | } // Anonymous namespace | ||
| 80 | |||
| 81 | void TranslatorVisitor::XMAD_reg(u64 insn) { | ||
| 82 | union { | ||
| 83 | u64 raw; | ||
| 84 | BitField<35, 1, Half> half_b; | ||
| 85 | BitField<36, 1, u64> psl; | ||
| 86 | BitField<37, 1, u64> mrg; | ||
| 87 | BitField<38, 1, u64> x; | ||
| 88 | BitField<50, 3, SelectMode> select_mode; | ||
| 89 | } const xmad{insn}; | ||
| 90 | |||
| 91 | XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, | ||
| 92 | xmad.mrg != 0, xmad.x != 0); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::XMAD_rc(u64 insn) { | ||
| 96 | union { | ||
| 97 | u64 raw; | ||
| 98 | BitField<50, 2, SelectMode> select_mode; | ||
| 99 | BitField<52, 1, Half> half_b; | ||
| 100 | BitField<54, 1, u64> x; | ||
| 101 | } const xmad{insn}; | ||
| 102 | |||
| 103 | XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false, | ||
| 104 | xmad.x != 0); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TranslatorVisitor::XMAD_cr(u64 insn) { | ||
| 108 | union { | ||
| 109 | u64 raw; | ||
| 110 | BitField<50, 2, SelectMode> select_mode; | ||
| 111 | BitField<52, 1, Half> half_b; | ||
| 112 | BitField<54, 1, u64> x; | ||
| 113 | BitField<55, 1, u64> psl; | ||
| 114 | BitField<56, 1, u64> mrg; | ||
| 115 | } const xmad{insn}; | ||
| 116 | |||
| 117 | XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, | ||
| 118 | xmad.mrg != 0, xmad.x != 0); | ||
| 119 | } | ||
| 120 | |||
| 121 | void TranslatorVisitor::XMAD_imm(u64 insn) { | ||
| 122 | union { | ||
| 123 | u64 raw; | ||
| 124 | BitField<20, 16, u64> src_b; | ||
| 125 | BitField<36, 1, u64> psl; | ||
| 126 | BitField<37, 1, u64> mrg; | ||
| 127 | BitField<38, 1, u64> x; | ||
| 128 | BitField<50, 3, SelectMode> select_mode; | ||
| 129 | } const xmad{insn}; | ||
| 130 | |||
| 131 | XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode, | ||
| 132 | Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); | ||
| 133 | } | ||
| 134 | |||
| 135 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp new file mode 100644 index 000000000..53e8d8923 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp | |||
| @@ -0,0 +1,126 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class IntegerWidth : u64 { | ||
| 12 | Byte, | ||
| 13 | Short, | ||
| 14 | Word, | ||
| 15 | }; | ||
| 16 | |||
| 17 | [[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) { | ||
| 18 | switch (width) { | ||
| 19 | case IntegerWidth::Byte: | ||
| 20 | return ir.Imm32(8); | ||
| 21 | case IntegerWidth::Short: | ||
| 22 | return ir.Imm32(16); | ||
| 23 | case IntegerWidth::Word: | ||
| 24 | return ir.Imm32(32); | ||
| 25 | default: | ||
| 26 | throw NotImplementedException("Invalid width {}", width); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | [[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, | ||
| 31 | IntegerWidth dst_width) { | ||
| 32 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 33 | const IR::U32 count{WidthSize(ir, dst_width)}; | ||
| 34 | return ir.BitFieldExtract(src, zero, count, false); | ||
| 35 | } | ||
| 36 | |||
| 37 | [[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width, | ||
| 38 | bool dst_signed, bool src_signed) { | ||
| 39 | IR::U32 min{}; | ||
| 40 | IR::U32 max{}; | ||
| 41 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 42 | switch (dst_width) { | ||
| 43 | case IntegerWidth::Byte: | ||
| 44 | min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero; | ||
| 45 | max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff); | ||
| 46 | break; | ||
| 47 | case IntegerWidth::Short: | ||
| 48 | min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero; | ||
| 49 | max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff); | ||
| 50 | break; | ||
| 51 | case IntegerWidth::Word: | ||
| 52 | min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero; | ||
| 53 | max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff); | ||
| 54 | break; | ||
| 55 | default: | ||
| 56 | throw NotImplementedException("Invalid width {}", dst_width); | ||
| 57 | } | ||
| 58 | const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src}; | ||
| 59 | return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max); | ||
| 60 | } | ||
| 61 | |||
| 62 | void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { | ||
| 63 | union { | ||
| 64 | u64 insn; | ||
| 65 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 66 | BitField<8, 2, IntegerWidth> dst_fmt; | ||
| 67 | BitField<12, 1, u64> dst_fmt_sign; | ||
| 68 | BitField<10, 2, IntegerWidth> src_fmt; | ||
| 69 | BitField<13, 1, u64> src_fmt_sign; | ||
| 70 | BitField<41, 3, u64> selector; | ||
| 71 | BitField<45, 1, u64> neg; | ||
| 72 | BitField<47, 1, u64> cc; | ||
| 73 | BitField<49, 1, u64> abs; | ||
| 74 | BitField<50, 1, u64> sat; | ||
| 75 | } const i2i{insn}; | ||
| 76 | |||
| 77 | if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { | ||
| 78 | throw NotImplementedException("16-bit source format incompatible with selector {}", | ||
| 79 | i2i.selector); | ||
| 80 | } | ||
| 81 | if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) { | ||
| 82 | throw NotImplementedException("32-bit source format incompatible with selector {}", | ||
| 83 | i2i.selector); | ||
| 84 | } | ||
| 85 | |||
| 86 | const s32 selector{static_cast<s32>(i2i.selector)}; | ||
| 87 | const IR::U32 offset{v.ir.Imm32(selector * 8)}; | ||
| 88 | const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; | ||
| 89 | const bool src_signed{i2i.src_fmt_sign != 0}; | ||
| 90 | const bool dst_signed{i2i.dst_fmt_sign != 0}; | ||
| 91 | const bool sat{i2i.sat != 0}; | ||
| 92 | |||
| 93 | IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)}; | ||
| 94 | if (i2i.abs != 0) { | ||
| 95 | src_values = v.ir.IAbs(src_values); | ||
| 96 | } | ||
| 97 | if (i2i.neg != 0) { | ||
| 98 | src_values = v.ir.INeg(src_values); | ||
| 99 | } | ||
| 100 | const IR::U32 result{ | ||
| 101 | sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed) | ||
| 102 | : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; | ||
| 103 | |||
| 104 | v.X(i2i.dest_reg, result); | ||
| 105 | if (i2i.cc != 0) { | ||
| 106 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 107 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 108 | v.ResetCFlag(); | ||
| 109 | v.ResetOFlag(); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | } // Anonymous namespace | ||
| 113 | |||
| 114 | void TranslatorVisitor::I2I_reg(u64 insn) { | ||
| 115 | I2I(*this, insn, GetReg20(insn)); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::I2I_cbuf(u64 insn) { | ||
| 119 | I2I(*this, insn, GetCbuf(insn)); | ||
| 120 | } | ||
| 121 | |||
| 122 | void TranslatorVisitor::I2I_imm(u64 insn) { | ||
| 123 | I2I(*this, insn, GetImm20(insn)); | ||
| 124 | } | ||
| 125 | |||
| 126 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp new file mode 100644 index 000000000..9b85f8059 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | Default, | ||
| 13 | Patch, | ||
| 14 | Prim, | ||
| 15 | Attr, | ||
| 16 | }; | ||
| 17 | |||
| 18 | enum class Shift : u64 { | ||
| 19 | Default, | ||
| 20 | U16, | ||
| 21 | B32, | ||
| 22 | }; | ||
| 23 | |||
| 24 | } // Anonymous namespace | ||
| 25 | |||
| 26 | void TranslatorVisitor::ISBERD(u64 insn) { | ||
| 27 | union { | ||
| 28 | u64 raw; | ||
| 29 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 30 | BitField<8, 8, IR::Reg> src_reg; | ||
| 31 | BitField<31, 1, u64> skew; | ||
| 32 | BitField<32, 1, u64> o; | ||
| 33 | BitField<33, 2, Mode> mode; | ||
| 34 | BitField<47, 2, Shift> shift; | ||
| 35 | } const isberd{insn}; | ||
| 36 | |||
| 37 | if (isberd.skew != 0) { | ||
| 38 | throw NotImplementedException("SKEW"); | ||
| 39 | } | ||
| 40 | if (isberd.o != 0) { | ||
| 41 | throw NotImplementedException("O"); | ||
| 42 | } | ||
| 43 | if (isberd.mode != Mode::Default) { | ||
| 44 | throw NotImplementedException("Mode {}", isberd.mode.Value()); | ||
| 45 | } | ||
| 46 | if (isberd.shift != Shift::Default) { | ||
| 47 | throw NotImplementedException("Shift {}", isberd.shift.Value()); | ||
| 48 | } | ||
| 49 | LOG_WARNING(Shader, "(STUBBED) called"); | ||
| 50 | X(isberd.dest_reg, X(isberd.src_reg)); | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp new file mode 100644 index 000000000..2300088e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | using namespace LDC; | ||
| 12 | namespace { | ||
| 13 | std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, | ||
| 14 | const IR::U32& reg, const IR::U32& imm) { | ||
| 15 | switch (mode) { | ||
| 16 | case Mode::Default: | ||
| 17 | return {imm_index, ir.IAdd(reg, imm)}; | ||
| 18 | default: | ||
| 19 | break; | ||
| 20 | } | ||
| 21 | throw NotImplementedException("Mode {}", mode); | ||
| 22 | } | ||
| 23 | } // Anonymous namespace | ||
| 24 | |||
| 25 | void TranslatorVisitor::LDC(u64 insn) { | ||
| 26 | const Encoding ldc{insn}; | ||
| 27 | const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))}; | ||
| 28 | const IR::U32 reg{X(ldc.src_reg)}; | ||
| 29 | const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))}; | ||
| 30 | const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; | ||
| 31 | switch (ldc.size) { | ||
| 32 | case Size::U8: | ||
| 33 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)}); | ||
| 34 | break; | ||
| 35 | case Size::S8: | ||
| 36 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)}); | ||
| 37 | break; | ||
| 38 | case Size::U16: | ||
| 39 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)}); | ||
| 40 | break; | ||
| 41 | case Size::S16: | ||
| 42 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)}); | ||
| 43 | break; | ||
| 44 | case Size::B32: | ||
| 45 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)}); | ||
| 46 | break; | ||
| 47 | case Size::B64: { | ||
| 48 | if (!IR::IsAligned(ldc.dest_reg, 2)) { | ||
| 49 | throw NotImplementedException("Unaligned destination register"); | ||
| 50 | } | ||
| 51 | const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; | ||
| 52 | for (int i = 0; i < 2; ++i) { | ||
| 53 | X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 54 | } | ||
| 55 | break; | ||
| 56 | } | ||
| 57 | default: | ||
| 58 | throw NotImplementedException("Invalid size {}", ldc.size.Value()); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 000000000..3074ea0e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell::LDC { | ||
| 12 | |||
| 13 | enum class Mode : u64 { | ||
| 14 | Default, | ||
| 15 | IL, | ||
| 16 | IS, | ||
| 17 | ISL, | ||
| 18 | }; | ||
| 19 | |||
| 20 | enum class Size : u64 { | ||
| 21 | U8, | ||
| 22 | S8, | ||
| 23 | U16, | ||
| 24 | S16, | ||
| 25 | B32, | ||
| 26 | B64, | ||
| 27 | }; | ||
| 28 | |||
| 29 | union Encoding { | ||
| 30 | u64 raw; | ||
| 31 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 32 | BitField<8, 8, IR::Reg> src_reg; | ||
| 33 | BitField<20, 16, s64> offset; | ||
| 34 | BitField<36, 5, u64> index; | ||
| 35 | BitField<44, 2, Mode> mode; | ||
| 36 | BitField<48, 3, Size> size; | ||
| 37 | }; | ||
| 38 | |||
| 39 | } // namespace Shader::Maxwell::LDC | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp new file mode 100644 index 000000000..4a0f04e47 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale, | ||
| 12 | bool neg, bool x) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> offset_lo_reg; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 3, IR::Pred> pred; | ||
| 19 | } const lea{insn}; | ||
| 20 | |||
| 21 | if (x) { | ||
| 22 | throw NotImplementedException("LEA.HI X"); | ||
| 23 | } | ||
| 24 | if (lea.pred != IR::Pred::PT) { | ||
| 25 | throw NotImplementedException("LEA.HI Pred"); | ||
| 26 | } | ||
| 27 | if (lea.cc != 0) { | ||
| 28 | throw NotImplementedException("LEA.HI CC"); | ||
| 29 | } | ||
| 30 | |||
| 31 | const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; | ||
| 32 | const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))}; | ||
| 33 | const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset}; | ||
| 34 | |||
| 35 | const s32 hi_scale{32 - static_cast<s32>(scale)}; | ||
| 36 | const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))}; | ||
| 37 | const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)}; | ||
| 38 | |||
| 39 | IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)}; | ||
| 40 | v.X(lea.dest_reg, result); | ||
| 41 | } | ||
| 42 | |||
| 43 | void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { | ||
| 44 | union { | ||
| 45 | u64 insn; | ||
| 46 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 47 | BitField<8, 8, IR::Reg> offset_lo_reg; | ||
| 48 | BitField<39, 5, u64> scale; | ||
| 49 | BitField<45, 1, u64> neg; | ||
| 50 | BitField<46, 1, u64> x; | ||
| 51 | BitField<47, 1, u64> cc; | ||
| 52 | BitField<48, 3, IR::Pred> pred; | ||
| 53 | } const lea{insn}; | ||
| 54 | if (lea.x != 0) { | ||
| 55 | throw NotImplementedException("LEA.LO X"); | ||
| 56 | } | ||
| 57 | if (lea.pred != IR::Pred::PT) { | ||
| 58 | throw NotImplementedException("LEA.LO Pred"); | ||
| 59 | } | ||
| 60 | if (lea.cc != 0) { | ||
| 61 | throw NotImplementedException("LEA.LO CC"); | ||
| 62 | } | ||
| 63 | |||
| 64 | const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; | ||
| 65 | const s32 scale{static_cast<s32>(lea.scale)}; | ||
| 66 | const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo}; | ||
| 67 | const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))}; | ||
| 68 | |||
| 69 | IR::U32 result{v.ir.IAdd(base, scaled_offset)}; | ||
| 70 | v.X(lea.dest_reg, result); | ||
| 71 | } | ||
| 72 | } // Anonymous namespace | ||
| 73 | |||
| 74 | void TranslatorVisitor::LEA_hi_reg(u64 insn) { | ||
| 75 | union { | ||
| 76 | u64 insn; | ||
| 77 | BitField<28, 5, u64> scale; | ||
| 78 | BitField<37, 1, u64> neg; | ||
| 79 | BitField<38, 1, u64> x; | ||
| 80 | } const lea{insn}; | ||
| 81 | |||
| 82 | LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); | ||
| 83 | } | ||
| 84 | |||
| 85 | void TranslatorVisitor::LEA_hi_cbuf(u64 insn) { | ||
| 86 | union { | ||
| 87 | u64 insn; | ||
| 88 | BitField<51, 5, u64> scale; | ||
| 89 | BitField<56, 1, u64> neg; | ||
| 90 | BitField<57, 1, u64> x; | ||
| 91 | } const lea{insn}; | ||
| 92 | |||
| 93 | LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); | ||
| 94 | } | ||
| 95 | |||
| 96 | void TranslatorVisitor::LEA_lo_reg(u64 insn) { | ||
| 97 | LEA_lo(*this, insn, GetReg20(insn)); | ||
| 98 | } | ||
| 99 | |||
| 100 | void TranslatorVisitor::LEA_lo_cbuf(u64 insn) { | ||
| 101 | LEA_lo(*this, insn, GetCbuf(insn)); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::LEA_lo_imm(u64 insn) { | ||
| 105 | LEA_lo(*this, insn, GetImm20(insn)); | ||
| 106 | } | ||
| 107 | |||
| 108 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp new file mode 100644 index 000000000..924fb7a40 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp | |||
| @@ -0,0 +1,196 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Size : u64 { | ||
| 15 | B32, | ||
| 16 | B64, | ||
| 17 | B96, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class InterpolationMode : u64 { | ||
| 22 | Pass, | ||
| 23 | Multiply, | ||
| 24 | Constant, | ||
| 25 | Sc, | ||
| 26 | }; | ||
| 27 | |||
| 28 | enum class SampleMode : u64 { | ||
| 29 | Default, | ||
| 30 | Centroid, | ||
| 31 | Offset, | ||
| 32 | }; | ||
| 33 | |||
| 34 | u32 NumElements(Size size) { | ||
| 35 | switch (size) { | ||
| 36 | case Size::B32: | ||
| 37 | return 1; | ||
| 38 | case Size::B64: | ||
| 39 | return 2; | ||
| 40 | case Size::B96: | ||
| 41 | return 3; | ||
| 42 | case Size::B128: | ||
| 43 | return 4; | ||
| 44 | } | ||
| 45 | throw InvalidArgument("Invalid size {}", size); | ||
| 46 | } | ||
| 47 | |||
| 48 | template <typename F> | ||
| 49 | void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) { | ||
| 50 | const IR::U32 index_value{v.X(index_reg)}; | ||
| 51 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 52 | const IR::U32 final_offset{ | ||
| 53 | element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}}; | ||
| 54 | f(element, final_offset); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | void TranslatorVisitor::ALD(u64 insn) { | ||
| 61 | union { | ||
| 62 | u64 raw; | ||
| 63 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 64 | BitField<8, 8, IR::Reg> index_reg; | ||
| 65 | BitField<20, 10, u64> absolute_offset; | ||
| 66 | BitField<20, 11, s64> relative_offset; | ||
| 67 | BitField<39, 8, IR::Reg> vertex_reg; | ||
| 68 | BitField<32, 1, u64> o; | ||
| 69 | BitField<31, 1, u64> patch; | ||
| 70 | BitField<47, 2, Size> size; | ||
| 71 | } const ald{insn}; | ||
| 72 | |||
| 73 | const u64 offset{ald.absolute_offset.Value()}; | ||
| 74 | if (offset % 4 != 0) { | ||
| 75 | throw NotImplementedException("Unaligned absolute offset {}", offset); | ||
| 76 | } | ||
| 77 | const IR::U32 vertex{X(ald.vertex_reg)}; | ||
| 78 | const u32 num_elements{NumElements(ald.size)}; | ||
| 79 | if (ald.index_reg == IR::Reg::RZ) { | ||
| 80 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 81 | if (ald.patch != 0) { | ||
| 82 | const IR::Patch patch{offset / 4 + element}; | ||
| 83 | F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch)); | ||
| 84 | } else { | ||
| 85 | const IR::Attribute attr{offset / 4 + element}; | ||
| 86 | F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex)); | ||
| 87 | } | ||
| 88 | } | ||
| 89 | return; | ||
| 90 | } | ||
| 91 | if (ald.patch != 0) { | ||
| 92 | throw NotImplementedException("Indirect patch read"); | ||
| 93 | } | ||
| 94 | HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { | ||
| 95 | F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex)); | ||
| 96 | }); | ||
| 97 | } | ||
| 98 | |||
| 99 | void TranslatorVisitor::AST(u64 insn) { | ||
| 100 | union { | ||
| 101 | u64 raw; | ||
| 102 | BitField<0, 8, IR::Reg> src_reg; | ||
| 103 | BitField<8, 8, IR::Reg> index_reg; | ||
| 104 | BitField<20, 10, u64> absolute_offset; | ||
| 105 | BitField<20, 11, s64> relative_offset; | ||
| 106 | BitField<31, 1, u64> patch; | ||
| 107 | BitField<39, 8, IR::Reg> vertex_reg; | ||
| 108 | BitField<47, 2, Size> size; | ||
| 109 | } const ast{insn}; | ||
| 110 | |||
| 111 | if (ast.index_reg != IR::Reg::RZ) { | ||
| 112 | throw NotImplementedException("Indexed store"); | ||
| 113 | } | ||
| 114 | const u64 offset{ast.absolute_offset.Value()}; | ||
| 115 | if (offset % 4 != 0) { | ||
| 116 | throw NotImplementedException("Unaligned absolute offset {}", offset); | ||
| 117 | } | ||
| 118 | const IR::U32 vertex{X(ast.vertex_reg)}; | ||
| 119 | const u32 num_elements{NumElements(ast.size)}; | ||
| 120 | if (ast.index_reg == IR::Reg::RZ) { | ||
| 121 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 122 | if (ast.patch != 0) { | ||
| 123 | const IR::Patch patch{offset / 4 + element}; | ||
| 124 | ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element))); | ||
| 125 | } else { | ||
| 126 | const IR::Attribute attr{offset / 4 + element}; | ||
| 127 | ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | return; | ||
| 131 | } | ||
| 132 | if (ast.patch != 0) { | ||
| 133 | throw NotImplementedException("Indexed tessellation patch store"); | ||
| 134 | } | ||
| 135 | HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { | ||
| 136 | ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex); | ||
| 137 | }); | ||
| 138 | } | ||
| 139 | |||
| 140 | void TranslatorVisitor::IPA(u64 insn) { | ||
| 141 | // IPA is the instruction used to read varyings from a fragment shader. | ||
| 142 | // gl_FragCoord is mapped to the gl_Position attribute. | ||
| 143 | // It yields unknown results when used outside of the fragment shader stage. | ||
| 144 | union { | ||
| 145 | u64 raw; | ||
| 146 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 147 | BitField<8, 8, IR::Reg> index_reg; | ||
| 148 | BitField<20, 8, IR::Reg> multiplier; | ||
| 149 | BitField<30, 8, IR::Attribute> attribute; | ||
| 150 | BitField<38, 1, u64> idx; | ||
| 151 | BitField<51, 1, u64> sat; | ||
| 152 | BitField<52, 2, SampleMode> sample_mode; | ||
| 153 | BitField<54, 2, InterpolationMode> interpolation_mode; | ||
| 154 | } const ipa{insn}; | ||
| 155 | |||
| 156 | // Indexed IPAs are used for indexed varyings. | ||
| 157 | // For example: | ||
| 158 | // | ||
| 159 | // in vec4 colors[4]; | ||
| 160 | // uniform int idx; | ||
| 161 | // void main() { | ||
| 162 | // gl_FragColor = colors[idx]; | ||
| 163 | // } | ||
| 164 | const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; | ||
| 165 | const IR::Attribute attribute{ipa.attribute}; | ||
| 166 | IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg)) | ||
| 167 | : ir.GetAttribute(attribute)}; | ||
| 168 | if (IR::IsGeneric(attribute)) { | ||
| 169 | const ProgramHeader& sph{env.SPH()}; | ||
| 170 | const u32 attr_index{IR::GenericAttributeIndex(attribute)}; | ||
| 171 | const u32 element{static_cast<u32>(attribute) % 4}; | ||
| 172 | const std::array input_map{sph.ps.GenericInputMap(attr_index)}; | ||
| 173 | const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective}; | ||
| 174 | if (is_perspective) { | ||
| 175 | const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)}; | ||
| 176 | value = ir.FPMul(value, position_w); | ||
| 177 | } | ||
| 178 | } | ||
| 179 | if (ipa.interpolation_mode == InterpolationMode::Multiply) { | ||
| 180 | value = ir.FPMul(value, F(ipa.multiplier)); | ||
| 181 | } | ||
| 182 | |||
| 183 | // Saturated IPAs are generally generated out of clamped varyings. | ||
| 184 | // For example: clamp(some_varying, 0.0, 1.0) | ||
| 185 | const bool is_saturated{ipa.sat != 0}; | ||
| 186 | if (is_saturated) { | ||
| 187 | if (attribute == IR::Attribute::FrontFace) { | ||
| 188 | throw NotImplementedException("IPA.SAT on FrontFace"); | ||
| 189 | } | ||
| 190 | value = ir.FPSaturate(value); | ||
| 191 | } | ||
| 192 | |||
| 193 | F(ipa.dest_reg, value); | ||
| 194 | } | ||
| 195 | |||
| 196 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp new file mode 100644 index 000000000..d2a1dbf61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Size : u64 { | ||
| 12 | U8, | ||
| 13 | S8, | ||
| 14 | U16, | ||
| 15 | S16, | ||
| 16 | B32, | ||
| 17 | B64, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 21 | IR::U32 Offset(TranslatorVisitor& v, u64 insn) { | ||
| 22 | union { | ||
| 23 | u64 raw; | ||
| 24 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 25 | BitField<20, 24, u64> absolute_offset; | ||
| 26 | BitField<20, 24, s64> relative_offset; | ||
| 27 | } const encoding{insn}; | ||
| 28 | |||
| 29 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 30 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset)); | ||
| 31 | } else { | ||
| 32 | const s32 relative{static_cast<s32>(encoding.relative_offset.Value())}; | ||
| 33 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) { | ||
| 38 | const IR::U32 offset{Offset(v, insn)}; | ||
| 39 | if (offset.IsImmediate()) { | ||
| 40 | return {v.ir.Imm32(offset.U32() / 4), offset}; | ||
| 41 | } else { | ||
| 42 | return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset}; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | std::pair<int, bool> GetSize(u64 insn) { | ||
| 47 | union { | ||
| 48 | u64 raw; | ||
| 49 | BitField<48, 3, Size> size; | ||
| 50 | } const encoding{insn}; | ||
| 51 | |||
| 52 | switch (encoding.size) { | ||
| 53 | case Size::U8: | ||
| 54 | return {8, false}; | ||
| 55 | case Size::S8: | ||
| 56 | return {8, true}; | ||
| 57 | case Size::U16: | ||
| 58 | return {16, false}; | ||
| 59 | case Size::S16: | ||
| 60 | return {16, true}; | ||
| 61 | case Size::B32: | ||
| 62 | return {32, false}; | ||
| 63 | case Size::B64: | ||
| 64 | return {64, false}; | ||
| 65 | case Size::B128: | ||
| 66 | return {128, false}; | ||
| 67 | default: | ||
| 68 | throw NotImplementedException("Invalid size {}", encoding.size.Value()); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | IR::Reg Reg(u64 insn) { | ||
| 73 | union { | ||
| 74 | u64 raw; | ||
| 75 | BitField<0, 8, IR::Reg> reg; | ||
| 76 | } const encoding{insn}; | ||
| 77 | |||
| 78 | return encoding.reg; | ||
| 79 | } | ||
| 80 | |||
| 81 | IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 82 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); | ||
| 83 | } | ||
| 84 | |||
| 85 | IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 86 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); | ||
| 87 | } | ||
| 88 | |||
| 89 | IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) { | ||
| 90 | const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())}; | ||
| 91 | const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)}; | ||
| 92 | return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))}; | ||
| 93 | } | ||
| 94 | } // Anonymous namespace | ||
| 95 | |||
| 96 | void TranslatorVisitor::LDL(u64 insn) { | ||
| 97 | const auto [word_offset, offset]{WordOffset(*this, insn)}; | ||
| 98 | const IR::U32 word{LoadLocal(*this, word_offset, offset)}; | ||
| 99 | const IR::Reg dest{Reg(insn)}; | ||
| 100 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 101 | switch (bit_size) { | ||
| 102 | case 8: { | ||
| 103 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 104 | X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed)); | ||
| 105 | break; | ||
| 106 | } | ||
| 107 | case 16: { | ||
| 108 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 109 | X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed)); | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | case 32: | ||
| 113 | case 64: | ||
| 114 | case 128: | ||
| 115 | if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { | ||
| 116 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 117 | } | ||
| 118 | X(dest, word); | ||
| 119 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 120 | const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))}; | ||
| 121 | const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))}; | ||
| 122 | X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset)); | ||
| 123 | } | ||
| 124 | break; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | void TranslatorVisitor::LDS(u64 insn) { | ||
| 129 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 130 | const IR::Reg dest{Reg(insn)}; | ||
| 131 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 132 | const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; | ||
| 133 | switch (bit_size) { | ||
| 134 | case 8: | ||
| 135 | case 16: | ||
| 136 | case 32: | ||
| 137 | X(dest, IR::U32{value}); | ||
| 138 | break; | ||
| 139 | case 64: | ||
| 140 | case 128: | ||
| 141 | if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { | ||
| 142 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 143 | } | ||
| 144 | for (int element = 0; element < bit_size / 32; ++element) { | ||
| 145 | X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))}); | ||
| 146 | } | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 | void TranslatorVisitor::STL(u64 insn) { | ||
| 152 | const auto [word_offset, offset]{WordOffset(*this, insn)}; | ||
| 153 | if (offset.IsImmediate()) { | ||
| 154 | // TODO: Support storing out of bounds at runtime | ||
| 155 | if (offset.U32() >= env.LocalMemorySize()) { | ||
| 156 | LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping", | ||
| 157 | offset.U32(), env.LocalMemorySize()); | ||
| 158 | return; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | const IR::Reg reg{Reg(insn)}; | ||
| 162 | const IR::U32 src{X(reg)}; | ||
| 163 | const int bit_size{GetSize(insn).first}; | ||
| 164 | switch (bit_size) { | ||
| 165 | case 8: { | ||
| 166 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 167 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; | ||
| 168 | ir.WriteLocal(word_offset, value); | ||
| 169 | break; | ||
| 170 | } | ||
| 171 | case 16: { | ||
| 172 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 173 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; | ||
| 174 | ir.WriteLocal(word_offset, value); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | case 32: | ||
| 178 | case 64: | ||
| 179 | case 128: | ||
| 180 | if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) { | ||
| 181 | throw NotImplementedException("Unaligned source register"); | ||
| 182 | } | ||
| 183 | ir.WriteLocal(word_offset, src); | ||
| 184 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 185 | ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); | ||
| 186 | } | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | void TranslatorVisitor::STS(u64 insn) { | ||
| 192 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 193 | const IR::Reg reg{Reg(insn)}; | ||
| 194 | const int bit_size{GetSize(insn).first}; | ||
| 195 | switch (bit_size) { | ||
| 196 | case 8: | ||
| 197 | case 16: | ||
| 198 | case 32: | ||
| 199 | ir.WriteShared(bit_size, offset, X(reg)); | ||
| 200 | break; | ||
| 201 | case 64: | ||
| 202 | if (!IR::IsAligned(reg, 2)) { | ||
| 203 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 204 | } | ||
| 205 | ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); | ||
| 206 | break; | ||
| 207 | case 128: { | ||
| 208 | if (!IR::IsAligned(reg, 2)) { | ||
| 209 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 210 | } | ||
| 211 | const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; | ||
| 212 | ir.WriteShared(128, offset, vector); | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp new file mode 100644 index 000000000..36c5cff2f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class LoadSize : u64 { | ||
| 14 | U8, // Zero-extend | ||
| 15 | S8, // Sign-extend | ||
| 16 | U16, // Zero-extend | ||
| 17 | S16, // Sign-extend | ||
| 18 | B32, | ||
| 19 | B64, | ||
| 20 | B128, | ||
| 21 | U128, // ??? | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class StoreSize : u64 { | ||
| 25 | U8, // Zero-extend | ||
| 26 | S8, // Sign-extend | ||
| 27 | U16, // Zero-extend | ||
| 28 | S16, // Sign-extend | ||
| 29 | B32, | ||
| 30 | B64, | ||
| 31 | B128, | ||
| 32 | }; | ||
| 33 | |||
| 34 | // See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||
| 35 | enum class LoadCache : u64 { | ||
| 36 | CA, // Cache at all levels, likely to be accessed again | ||
| 37 | CG, // Cache at global level (cache in L2 and below, not L1) | ||
| 38 | CI, // ??? | ||
| 39 | CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) | ||
| 40 | }; | ||
| 41 | |||
| 42 | // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||
| 43 | enum class StoreCache : u64 { | ||
| 44 | WB, // Cache write-back all coherent levels | ||
| 45 | CG, // Cache at global level | ||
| 46 | CS, // Cache streaming, likely to be accessed once | ||
| 47 | WT, // Cache write-through (to system memory) | ||
| 48 | }; | ||
| 49 | |||
| 50 | IR::U64 Address(TranslatorVisitor& v, u64 insn) { | ||
| 51 | union { | ||
| 52 | u64 raw; | ||
| 53 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 54 | BitField<20, 24, s64> addr_offset; | ||
| 55 | BitField<20, 24, u64> rz_addr_offset; | ||
| 56 | BitField<45, 1, u64> e; | ||
| 57 | } const mem{insn}; | ||
| 58 | |||
| 59 | const IR::U64 address{[&]() -> IR::U64 { | ||
| 60 | if (mem.e == 0) { | ||
| 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it | ||
| 62 | return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||
| 63 | } | ||
| 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { | ||
| 65 | throw NotImplementedException("Unaligned address register"); | ||
| 66 | } | ||
| 67 | // Pack two registers to build the 64-bit address | ||
| 68 | return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); | ||
| 69 | }()}; | ||
| 70 | const u64 addr_offset{[&]() -> u64 { | ||
| 71 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 72 | // When RZ is used, the address is an absolute address | ||
| 73 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 74 | } else { | ||
| 75 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 76 | } | ||
| 77 | }()}; | ||
| 78 | // Apply the offset | ||
| 79 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 80 | } | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 83 | void TranslatorVisitor::LDG(u64 insn) { | ||
| 84 | // LDG loads global memory into registers | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<46, 2, LoadCache> cache; | ||
| 89 | BitField<48, 3, LoadSize> size; | ||
| 90 | } const ldg{insn}; | ||
| 91 | |||
| 92 | // Pointer to load data from | ||
| 93 | const IR::U64 address{Address(*this, insn)}; | ||
| 94 | const IR::Reg dest_reg{ldg.dest_reg}; | ||
| 95 | switch (ldg.size) { | ||
| 96 | case LoadSize::U8: | ||
| 97 | X(dest_reg, ir.LoadGlobalU8(address)); | ||
| 98 | break; | ||
| 99 | case LoadSize::S8: | ||
| 100 | X(dest_reg, ir.LoadGlobalS8(address)); | ||
| 101 | break; | ||
| 102 | case LoadSize::U16: | ||
| 103 | X(dest_reg, ir.LoadGlobalU16(address)); | ||
| 104 | break; | ||
| 105 | case LoadSize::S16: | ||
| 106 | X(dest_reg, ir.LoadGlobalS16(address)); | ||
| 107 | break; | ||
| 108 | case LoadSize::B32: | ||
| 109 | X(dest_reg, ir.LoadGlobal32(address)); | ||
| 110 | break; | ||
| 111 | case LoadSize::B64: { | ||
| 112 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 113 | throw NotImplementedException("Unaligned data registers"); | ||
| 114 | } | ||
| 115 | const IR::Value vector{ir.LoadGlobal64(address)}; | ||
| 116 | for (int i = 0; i < 2; ++i) { | ||
| 117 | X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 118 | } | ||
| 119 | break; | ||
| 120 | } | ||
| 121 | case LoadSize::B128: | ||
| 122 | case LoadSize::U128: { | ||
| 123 | if (!IR::IsAligned(dest_reg, 4)) { | ||
| 124 | throw NotImplementedException("Unaligned data registers"); | ||
| 125 | } | ||
| 126 | const IR::Value vector{ir.LoadGlobal128(address)}; | ||
| 127 | for (int i = 0; i < 4; ++i) { | ||
| 128 | X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 129 | } | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | default: | ||
| 133 | throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | void TranslatorVisitor::STG(u64 insn) { | ||
| 138 | // STG stores registers into global memory. | ||
| 139 | union { | ||
| 140 | u64 raw; | ||
| 141 | BitField<0, 8, IR::Reg> data_reg; | ||
| 142 | BitField<46, 2, StoreCache> cache; | ||
| 143 | BitField<48, 3, StoreSize> size; | ||
| 144 | } const stg{insn}; | ||
| 145 | |||
| 146 | // Pointer to store data into | ||
| 147 | const IR::U64 address{Address(*this, insn)}; | ||
| 148 | const IR::Reg data_reg{stg.data_reg}; | ||
| 149 | switch (stg.size) { | ||
| 150 | case StoreSize::U8: | ||
| 151 | ir.WriteGlobalU8(address, X(data_reg)); | ||
| 152 | break; | ||
| 153 | case StoreSize::S8: | ||
| 154 | ir.WriteGlobalS8(address, X(data_reg)); | ||
| 155 | break; | ||
| 156 | case StoreSize::U16: | ||
| 157 | ir.WriteGlobalU16(address, X(data_reg)); | ||
| 158 | break; | ||
| 159 | case StoreSize::S16: | ||
| 160 | ir.WriteGlobalS16(address, X(data_reg)); | ||
| 161 | break; | ||
| 162 | case StoreSize::B32: | ||
| 163 | ir.WriteGlobal32(address, X(data_reg)); | ||
| 164 | break; | ||
| 165 | case StoreSize::B64: { | ||
| 166 | if (!IR::IsAligned(data_reg, 2)) { | ||
| 167 | throw NotImplementedException("Unaligned data registers"); | ||
| 168 | } | ||
| 169 | const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; | ||
| 170 | ir.WriteGlobal64(address, vector); | ||
| 171 | break; | ||
| 172 | } | ||
| 173 | case StoreSize::B128: | ||
| 174 | if (!IR::IsAligned(data_reg, 4)) { | ||
| 175 | throw NotImplementedException("Unaligned data registers"); | ||
| 176 | } | ||
| 177 | const IR::Value vector{ | ||
| 178 | ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; | ||
| 179 | ir.WriteGlobal128(address, vector); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp new file mode 100644 index 000000000..92cd27ed4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class LogicalOp : u64 { | ||
| 13 | AND, | ||
| 14 | OR, | ||
| 15 | XOR, | ||
| 16 | PASS_B, | ||
| 17 | }; | ||
| 18 | |||
| 19 | [[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 20 | const IR::U32& operand_2, LogicalOp op) { | ||
| 21 | switch (op) { | ||
| 22 | case LogicalOp::AND: | ||
| 23 | return ir.BitwiseAnd(operand_1, operand_2); | ||
| 24 | case LogicalOp::OR: | ||
| 25 | return ir.BitwiseOr(operand_1, operand_2); | ||
| 26 | case LogicalOp::XOR: | ||
| 27 | return ir.BitwiseXor(operand_1, operand_2); | ||
| 28 | case LogicalOp::PASS_B: | ||
| 29 | return operand_2; | ||
| 30 | default: | ||
| 31 | throw NotImplementedException("Invalid Logical operation {}", op); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b, | ||
| 36 | LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt, | ||
| 37 | IR::Pred dest_pred = IR::Pred::PT) { | ||
| 38 | union { | ||
| 39 | u64 insn; | ||
| 40 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 41 | BitField<8, 8, IR::Reg> src_reg; | ||
| 42 | } const lop{insn}; | ||
| 43 | |||
| 44 | if (x) { | ||
| 45 | throw NotImplementedException("X"); | ||
| 46 | } | ||
| 47 | IR::U32 op_a{v.X(lop.src_reg)}; | ||
| 48 | if (inv_a != 0) { | ||
| 49 | op_a = v.ir.BitwiseNot(op_a); | ||
| 50 | } | ||
| 51 | if (inv_b != 0) { | ||
| 52 | op_b = v.ir.BitwiseNot(op_b); | ||
| 53 | } | ||
| 54 | |||
| 55 | const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)}; | ||
| 56 | if (pred_op) { | ||
| 57 | const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; | ||
| 58 | v.ir.SetPred(dest_pred, pred_result); | ||
| 59 | } | ||
| 60 | if (cc) { | ||
| 61 | if (bit_op == LogicalOp::PASS_B) { | ||
| 62 | v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0))); | ||
| 63 | v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true)); | ||
| 64 | } else { | ||
| 65 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 66 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 67 | } | ||
| 68 | v.ResetCFlag(); | ||
| 69 | v.ResetOFlag(); | ||
| 70 | } | ||
| 71 | v.X(lop.dest_reg, result); | ||
| 72 | } | ||
| 73 | |||
| 74 | void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 75 | union { | ||
| 76 | u64 insn; | ||
| 77 | BitField<39, 1, u64> inv_a; | ||
| 78 | BitField<40, 1, u64> inv_b; | ||
| 79 | BitField<41, 2, LogicalOp> bit_op; | ||
| 80 | BitField<43, 1, u64> x; | ||
| 81 | BitField<44, 2, PredicateOp> pred_op; | ||
| 82 | BitField<47, 1, u64> cc; | ||
| 83 | BitField<48, 3, IR::Pred> dest_pred; | ||
| 84 | } const lop{insn}; | ||
| 85 | |||
| 86 | LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op, | ||
| 87 | lop.pred_op, lop.dest_pred); | ||
| 88 | } | ||
| 89 | } // Anonymous namespace | ||
| 90 | |||
| 91 | void TranslatorVisitor::LOP_reg(u64 insn) { | ||
| 92 | LOP(*this, insn, GetReg20(insn)); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::LOP_cbuf(u64 insn) { | ||
| 96 | LOP(*this, insn, GetCbuf(insn)); | ||
| 97 | } | ||
| 98 | |||
| 99 | void TranslatorVisitor::LOP_imm(u64 insn) { | ||
| 100 | LOP(*this, insn, GetImm20(insn)); | ||
| 101 | } | ||
| 102 | |||
| 103 | void TranslatorVisitor::LOP32I(u64 insn) { | ||
| 104 | union { | ||
| 105 | u64 raw; | ||
| 106 | BitField<53, 2, LogicalOp> bit_op; | ||
| 107 | BitField<57, 1, u64> x; | ||
| 108 | BitField<52, 1, u64> cc; | ||
| 109 | BitField<55, 1, u64> inv_a; | ||
| 110 | BitField<56, 1, u64> inv_b; | ||
| 111 | } const lop32i{insn}; | ||
| 112 | |||
| 113 | LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0, | ||
| 114 | lop32i.inv_b != 0, lop32i.bit_op); | ||
| 115 | } | ||
| 116 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp new file mode 100644 index 000000000..e0fe47912 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | // https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651 | ||
| 13 | // Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) | ||
| 14 | IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, | ||
| 15 | u64 ttbl) { | ||
| 16 | IR::U32 r{ir.Imm32(0)}; | ||
| 17 | const IR::U32 not_a{ir.BitwiseNot(a)}; | ||
| 18 | const IR::U32 not_b{ir.BitwiseNot(b)}; | ||
| 19 | const IR::U32 not_c{ir.BitwiseNot(c)}; | ||
| 20 | if (ttbl & 0x01) { | ||
| 21 | // r |= ~a & ~b & ~c; | ||
| 22 | const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||
| 23 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 24 | r = ir.BitwiseOr(r, rhs); | ||
| 25 | } | ||
| 26 | if (ttbl & 0x02) { | ||
| 27 | // r |= ~a & ~b & c; | ||
| 28 | const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||
| 29 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 30 | r = ir.BitwiseOr(r, rhs); | ||
| 31 | } | ||
| 32 | if (ttbl & 0x04) { | ||
| 33 | // r |= ~a & b & ~c; | ||
| 34 | const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||
| 35 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 36 | r = ir.BitwiseOr(r, rhs); | ||
| 37 | } | ||
| 38 | if (ttbl & 0x08) { | ||
| 39 | // r |= ~a & b & c; | ||
| 40 | const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||
| 41 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 42 | r = ir.BitwiseOr(r, rhs); | ||
| 43 | } | ||
| 44 | if (ttbl & 0x10) { | ||
| 45 | // r |= a & ~b & ~c; | ||
| 46 | const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||
| 47 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 48 | r = ir.BitwiseOr(r, rhs); | ||
| 49 | } | ||
| 50 | if (ttbl & 0x20) { | ||
| 51 | // r |= a & ~b & c; | ||
| 52 | const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||
| 53 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 54 | r = ir.BitwiseOr(r, rhs); | ||
| 55 | } | ||
| 56 | if (ttbl & 0x40) { | ||
| 57 | // r |= a & b & ~c; | ||
| 58 | const auto lhs{ir.BitwiseAnd(a, b)}; | ||
| 59 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 60 | r = ir.BitwiseOr(r, rhs); | ||
| 61 | } | ||
| 62 | if (ttbl & 0x80) { | ||
| 63 | // r |= a & b & c; | ||
| 64 | const auto lhs{ir.BitwiseAnd(a, b)}; | ||
| 65 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 66 | r = ir.BitwiseOr(r, rhs); | ||
| 67 | } | ||
| 68 | return r; | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { | ||
| 72 | union { | ||
| 73 | u64 insn; | ||
| 74 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 75 | BitField<8, 8, IR::Reg> src_reg; | ||
| 76 | BitField<47, 1, u64> cc; | ||
| 77 | } const lop3{insn}; | ||
| 78 | |||
| 79 | if (lop3.cc != 0) { | ||
| 80 | throw NotImplementedException("LOP3 CC"); | ||
| 81 | } | ||
| 82 | |||
| 83 | const IR::U32 op_a{v.X(lop3.src_reg)}; | ||
| 84 | const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; | ||
| 85 | v.X(lop3.dest_reg, result); | ||
| 86 | return result; | ||
| 87 | } | ||
| 88 | |||
| 89 | u64 GetLut48(u64 insn) { | ||
| 90 | union { | ||
| 91 | u64 raw; | ||
| 92 | BitField<48, 8, u64> lut; | ||
| 93 | } const lut{insn}; | ||
| 94 | return lut.lut; | ||
| 95 | } | ||
| 96 | } // Anonymous namespace | ||
| 97 | |||
| 98 | void TranslatorVisitor::LOP3_reg(u64 insn) { | ||
| 99 | union { | ||
| 100 | u64 insn; | ||
| 101 | BitField<28, 8, u64> lut; | ||
| 102 | BitField<38, 1, u64> x; | ||
| 103 | BitField<36, 2, PredicateOp> pred_op; | ||
| 104 | BitField<48, 3, IR::Pred> pred; | ||
| 105 | } const lop3{insn}; | ||
| 106 | |||
| 107 | if (lop3.x != 0) { | ||
| 108 | throw NotImplementedException("LOP3 X"); | ||
| 109 | } | ||
| 110 | const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)}; | ||
| 111 | const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)}; | ||
| 112 | ir.SetPred(lop3.pred, pred_result); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslatorVisitor::LOP3_cbuf(u64 insn) { | ||
| 116 | LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn)); | ||
| 117 | } | ||
| 118 | |||
| 119 | void TranslatorVisitor::LOP3_imm(u64 insn) { | ||
| 120 | LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn)); | ||
| 121 | } | ||
| 122 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | PR, | ||
| 13 | CC, | ||
| 14 | }; | ||
| 15 | } // Anonymous namespace | ||
| 16 | |||
| 17 | void TranslatorVisitor::P2R_reg(u64) { | ||
| 18 | throw NotImplementedException("P2R (reg)"); | ||
| 19 | } | ||
| 20 | |||
| 21 | void TranslatorVisitor::P2R_cbuf(u64) { | ||
| 22 | throw NotImplementedException("P2R (cbuf)"); | ||
| 23 | } | ||
| 24 | |||
| 25 | void TranslatorVisitor::P2R_imm(u64 insn) { | ||
| 26 | union { | ||
| 27 | u64 raw; | ||
| 28 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 29 | BitField<8, 8, IR::Reg> src; | ||
| 30 | BitField<40, 1, Mode> mode; | ||
| 31 | BitField<41, 2, u64> byte_selector; | ||
| 32 | } const p2r{insn}; | ||
| 33 | |||
| 34 | const u32 mask{GetImm20(insn).U32()}; | ||
| 35 | const bool pr_mode{p2r.mode == Mode::PR}; | ||
| 36 | const u32 num_items{pr_mode ? 7U : 4U}; | ||
| 37 | const u32 offset{static_cast<u32>(p2r.byte_selector) * 8}; | ||
| 38 | IR::U32 insert{ir.Imm32(0)}; | ||
| 39 | for (u32 index = 0; index < num_items; ++index) { | ||
| 40 | if (((mask >> index) & 1) == 0) { | ||
| 41 | continue; | ||
| 42 | } | ||
| 43 | const IR::U1 cond{[this, index, pr_mode] { | ||
| 44 | if (pr_mode) { | ||
| 45 | return ir.GetPred(IR::Pred{index}); | ||
| 46 | } | ||
| 47 | switch (index) { | ||
| 48 | case 0: | ||
| 49 | return ir.GetZFlag(); | ||
| 50 | case 1: | ||
| 51 | return ir.GetSFlag(); | ||
| 52 | case 2: | ||
| 53 | return ir.GetCFlag(); | ||
| 54 | case 3: | ||
| 55 | return ir.GetOFlag(); | ||
| 56 | } | ||
| 57 | throw LogicError("Unreachable P2R index"); | ||
| 58 | }()}; | ||
| 59 | const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; | ||
| 60 | insert = ir.BitwiseOr(insert, bit); | ||
| 61 | } | ||
| 62 | const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; | ||
| 63 | X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); | ||
| 64 | } | ||
| 65 | |||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp new file mode 100644 index 000000000..6bb08db8a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<39, 4, u64> mask; | ||
| 18 | BitField<12, 4, u64> mov32i_mask; | ||
| 19 | } const mov{insn}; | ||
| 20 | |||
| 21 | if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { | ||
| 22 | throw NotImplementedException("Non-full move mask"); | ||
| 23 | } | ||
| 24 | v.X(mov.dest_reg, src); | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 28 | void TranslatorVisitor::MOV_reg(u64 insn) { | ||
| 29 | MOV(*this, insn, GetReg20(insn)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::MOV_cbuf(u64 insn) { | ||
| 33 | MOV(*this, insn, GetCbuf(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::MOV_imm(u64 insn) { | ||
| 37 | MOV(*this, insn, GetImm20(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::MOV32I(u64 insn) { | ||
| 41 | MOV(*this, insn, GetImm32(insn), true); | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp new file mode 100644 index 000000000..eda5f177b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | PR, | ||
| 13 | CC, | ||
| 14 | }; | ||
| 15 | |||
| 16 | void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) { | ||
| 17 | switch (index) { | ||
| 18 | case 0: | ||
| 19 | return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)}); | ||
| 20 | case 1: | ||
| 21 | return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)}); | ||
| 22 | case 2: | ||
| 23 | return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)}); | ||
| 24 | case 3: | ||
| 25 | return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)}); | ||
| 26 | default: | ||
| 27 | throw LogicError("Unreachable R2P index"); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) { | ||
| 32 | union { | ||
| 33 | u64 raw; | ||
| 34 | BitField<8, 8, IR::Reg> src_reg; | ||
| 35 | BitField<40, 1, Mode> mode; | ||
| 36 | BitField<41, 2, u64> byte_selector; | ||
| 37 | } const r2p{insn}; | ||
| 38 | const IR::U32 src{v.X(r2p.src_reg)}; | ||
| 39 | const IR::U32 count{v.ir.Imm32(1)}; | ||
| 40 | const bool pr_mode{r2p.mode == Mode::PR}; | ||
| 41 | const u32 num_items{pr_mode ? 7U : 4U}; | ||
| 42 | const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8}; | ||
| 43 | for (u32 index = 0; index < num_items; ++index) { | ||
| 44 | const IR::U32 offset{v.ir.Imm32(offset_base + index)}; | ||
| 45 | const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))}; | ||
| 46 | const IR::U1 src_bit{v.ir.LogicalNot(src_zero)}; | ||
| 47 | const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)}; | ||
| 48 | const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)}; | ||
| 49 | if (pr_mode) { | ||
| 50 | const IR::Pred pred{index}; | ||
| 51 | v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)}); | ||
| 52 | } else { | ||
| 53 | SetFlag(v.ir, inv_mask_bit, src_bit, index); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | } | ||
| 57 | } // Anonymous namespace | ||
| 58 | |||
| 59 | void TranslatorVisitor::R2P_reg(u64 insn) { | ||
| 60 | R2P(*this, insn, GetReg20(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::R2P_cbuf(u64 insn) { | ||
| 64 | R2P(*this, insn, GetCbuf(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::R2P_imm(u64 insn) { | ||
| 68 | R2P(*this, insn, GetImm20(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..20cb2674e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp | |||
| @@ -0,0 +1,181 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SpecialRegister : u64 { | ||
| 12 | SR_LANEID = 0, | ||
| 13 | SR_CLOCK = 1, | ||
| 14 | SR_VIRTCFG = 2, | ||
| 15 | SR_VIRTID = 3, | ||
| 16 | SR_PM0 = 4, | ||
| 17 | SR_PM1 = 5, | ||
| 18 | SR_PM2 = 6, | ||
| 19 | SR_PM3 = 7, | ||
| 20 | SR_PM4 = 8, | ||
| 21 | SR_PM5 = 9, | ||
| 22 | SR_PM6 = 10, | ||
| 23 | SR_PM7 = 11, | ||
| 24 | SR12 = 12, | ||
| 25 | SR13 = 13, | ||
| 26 | SR14 = 14, | ||
| 27 | SR_ORDERING_TICKET = 15, | ||
| 28 | SR_PRIM_TYPE = 16, | ||
| 29 | SR_INVOCATION_ID = 17, | ||
| 30 | SR_Y_DIRECTION = 18, | ||
| 31 | SR_THREAD_KILL = 19, | ||
| 32 | SM_SHADER_TYPE = 20, | ||
| 33 | SR_DIRECTCBEWRITEADDRESSLOW = 21, | ||
| 34 | SR_DIRECTCBEWRITEADDRESSHIGH = 22, | ||
| 35 | SR_DIRECTCBEWRITEENABLE = 23, | ||
| 36 | SR_MACHINE_ID_0 = 24, | ||
| 37 | SR_MACHINE_ID_1 = 25, | ||
| 38 | SR_MACHINE_ID_2 = 26, | ||
| 39 | SR_MACHINE_ID_3 = 27, | ||
| 40 | SR_AFFINITY = 28, | ||
| 41 | SR_INVOCATION_INFO = 29, | ||
| 42 | SR_WSCALEFACTOR_XY = 30, | ||
| 43 | SR_WSCALEFACTOR_Z = 31, | ||
| 44 | SR_TID = 32, | ||
| 45 | SR_TID_X = 33, | ||
| 46 | SR_TID_Y = 34, | ||
| 47 | SR_TID_Z = 35, | ||
| 48 | SR_CTA_PARAM = 36, | ||
| 49 | SR_CTAID_X = 37, | ||
| 50 | SR_CTAID_Y = 38, | ||
| 51 | SR_CTAID_Z = 39, | ||
| 52 | SR_NTID = 40, | ||
| 53 | SR_CirQueueIncrMinusOne = 41, | ||
| 54 | SR_NLATC = 42, | ||
| 55 | SR43 = 43, | ||
| 56 | SR_SM_SPA_VERSION = 44, | ||
| 57 | SR_MULTIPASSSHADERINFO = 45, | ||
| 58 | SR_LWINHI = 46, | ||
| 59 | SR_SWINHI = 47, | ||
| 60 | SR_SWINLO = 48, | ||
| 61 | SR_SWINSZ = 49, | ||
| 62 | SR_SMEMSZ = 50, | ||
| 63 | SR_SMEMBANKS = 51, | ||
| 64 | SR_LWINLO = 52, | ||
| 65 | SR_LWINSZ = 53, | ||
| 66 | SR_LMEMLOSZ = 54, | ||
| 67 | SR_LMEMHIOFF = 55, | ||
| 68 | SR_EQMASK = 56, | ||
| 69 | SR_LTMASK = 57, | ||
| 70 | SR_LEMASK = 58, | ||
| 71 | SR_GTMASK = 59, | ||
| 72 | SR_GEMASK = 60, | ||
| 73 | SR_REGALLOC = 61, | ||
| 74 | SR_BARRIERALLOC = 62, | ||
| 75 | SR63 = 63, | ||
| 76 | SR_GLOBALERRORSTATUS = 64, | ||
| 77 | SR65 = 65, | ||
| 78 | SR_WARPERRORSTATUS = 66, | ||
| 79 | SR_WARPERRORSTATUSCLEAR = 67, | ||
| 80 | SR68 = 68, | ||
| 81 | SR69 = 69, | ||
| 82 | SR70 = 70, | ||
| 83 | SR71 = 71, | ||
| 84 | SR_PM_HI0 = 72, | ||
| 85 | SR_PM_HI1 = 73, | ||
| 86 | SR_PM_HI2 = 74, | ||
| 87 | SR_PM_HI3 = 75, | ||
| 88 | SR_PM_HI4 = 76, | ||
| 89 | SR_PM_HI5 = 77, | ||
| 90 | SR_PM_HI6 = 78, | ||
| 91 | SR_PM_HI7 = 79, | ||
| 92 | SR_CLOCKLO = 80, | ||
| 93 | SR_CLOCKHI = 81, | ||
| 94 | SR_GLOBALTIMERLO = 82, | ||
| 95 | SR_GLOBALTIMERHI = 83, | ||
| 96 | SR84 = 84, | ||
| 97 | SR85 = 85, | ||
| 98 | SR86 = 86, | ||
| 99 | SR87 = 87, | ||
| 100 | SR88 = 88, | ||
| 101 | SR89 = 89, | ||
| 102 | SR90 = 90, | ||
| 103 | SR91 = 91, | ||
| 104 | SR92 = 92, | ||
| 105 | SR93 = 93, | ||
| 106 | SR94 = 94, | ||
| 107 | SR95 = 95, | ||
| 108 | SR_HWTASKID = 96, | ||
| 109 | SR_CIRCULARQUEUEENTRYINDEX = 97, | ||
| 110 | SR_CIRCULARQUEUEENTRYADDRESSLOW = 98, | ||
| 111 | SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99, | ||
| 112 | }; | ||
| 113 | |||
| 114 | [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { | ||
| 115 | switch (special_register) { | ||
| 116 | case SpecialRegister::SR_INVOCATION_ID: | ||
| 117 | return ir.InvocationId(); | ||
| 118 | case SpecialRegister::SR_THREAD_KILL: | ||
| 119 | return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; | ||
| 120 | case SpecialRegister::SR_INVOCATION_INFO: | ||
| 121 | LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO"); | ||
| 122 | return ir.Imm32(0x00ff'0000); | ||
| 123 | case SpecialRegister::SR_TID: { | ||
| 124 | const IR::Value tid{ir.LocalInvocationId()}; | ||
| 125 | return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, | ||
| 126 | IR::U32{ir.CompositeExtract(tid, 1)}, | ||
| 127 | ir.Imm32(16), ir.Imm32(8)), | ||
| 128 | IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); | ||
| 129 | } | ||
| 130 | case SpecialRegister::SR_TID_X: | ||
| 131 | return ir.LocalInvocationIdX(); | ||
| 132 | case SpecialRegister::SR_TID_Y: | ||
| 133 | return ir.LocalInvocationIdY(); | ||
| 134 | case SpecialRegister::SR_TID_Z: | ||
| 135 | return ir.LocalInvocationIdZ(); | ||
| 136 | case SpecialRegister::SR_CTAID_X: | ||
| 137 | return ir.WorkgroupIdX(); | ||
| 138 | case SpecialRegister::SR_CTAID_Y: | ||
| 139 | return ir.WorkgroupIdY(); | ||
| 140 | case SpecialRegister::SR_CTAID_Z: | ||
| 141 | return ir.WorkgroupIdZ(); | ||
| 142 | case SpecialRegister::SR_WSCALEFACTOR_XY: | ||
| 143 | LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY"); | ||
| 144 | return ir.Imm32(Common::BitCast<u32>(1.0f)); | ||
| 145 | case SpecialRegister::SR_WSCALEFACTOR_Z: | ||
| 146 | LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z"); | ||
| 147 | return ir.Imm32(Common::BitCast<u32>(1.0f)); | ||
| 148 | case SpecialRegister::SR_LANEID: | ||
| 149 | return ir.LaneId(); | ||
| 150 | case SpecialRegister::SR_EQMASK: | ||
| 151 | return ir.SubgroupEqMask(); | ||
| 152 | case SpecialRegister::SR_LTMASK: | ||
| 153 | return ir.SubgroupLtMask(); | ||
| 154 | case SpecialRegister::SR_LEMASK: | ||
| 155 | return ir.SubgroupLeMask(); | ||
| 156 | case SpecialRegister::SR_GTMASK: | ||
| 157 | return ir.SubgroupGtMask(); | ||
| 158 | case SpecialRegister::SR_GEMASK: | ||
| 159 | return ir.SubgroupGeMask(); | ||
| 160 | case SpecialRegister::SR_Y_DIRECTION: | ||
| 161 | return ir.BitCast<IR::U32>(ir.YDirection()); | ||
| 162 | case SpecialRegister::SR_AFFINITY: | ||
| 163 | LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); | ||
| 164 | return ir.Imm32(0); // This is the default value hardware returns. | ||
| 165 | default: | ||
| 166 | throw NotImplementedException("S2R special register {}", special_register); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | } // Anonymous namespace | ||
| 170 | |||
| 171 | void TranslatorVisitor::S2R(u64 insn) { | ||
| 172 | union { | ||
| 173 | u64 raw; | ||
| 174 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 175 | BitField<20, 8, SpecialRegister> src_reg; | ||
| 176 | } const s2r{insn}; | ||
| 177 | |||
| 178 | X(s2r.dest_reg, Read(ir, s2r.src_reg)); | ||
| 179 | } | ||
| 180 | |||
| 181 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp new file mode 100644 index 000000000..7e26ab359 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -0,0 +1,283 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { | ||
| 13 | throw NotImplementedException("Instruction {} is not implemented", opcode); | ||
| 14 | } | ||
| 15 | |||
| 16 | void TranslatorVisitor::ATOM_cas(u64) { | ||
| 17 | ThrowNotImplemented(Opcode::ATOM_cas); | ||
| 18 | } | ||
| 19 | |||
| 20 | void TranslatorVisitor::ATOMS_cas(u64) { | ||
| 21 | ThrowNotImplemented(Opcode::ATOMS_cas); | ||
| 22 | } | ||
| 23 | |||
| 24 | void TranslatorVisitor::B2R(u64) { | ||
| 25 | ThrowNotImplemented(Opcode::B2R); | ||
| 26 | } | ||
| 27 | |||
| 28 | void TranslatorVisitor::BPT(u64) { | ||
| 29 | ThrowNotImplemented(Opcode::BPT); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::BRA(u64) { | ||
| 33 | ThrowNotImplemented(Opcode::BRA); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::BRK(u64) { | ||
| 37 | ThrowNotImplemented(Opcode::BRK); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::CAL() { | ||
| 41 | // CAL is a no-op | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::CCTL(u64) { | ||
| 45 | ThrowNotImplemented(Opcode::CCTL); | ||
| 46 | } | ||
| 47 | |||
| 48 | void TranslatorVisitor::CCTLL(u64) { | ||
| 49 | ThrowNotImplemented(Opcode::CCTLL); | ||
| 50 | } | ||
| 51 | |||
| 52 | void TranslatorVisitor::CONT(u64) { | ||
| 53 | ThrowNotImplemented(Opcode::CONT); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::CS2R(u64) { | ||
| 57 | ThrowNotImplemented(Opcode::CS2R); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::FCHK_reg(u64) { | ||
| 61 | ThrowNotImplemented(Opcode::FCHK_reg); | ||
| 62 | } | ||
| 63 | |||
| 64 | void TranslatorVisitor::FCHK_cbuf(u64) { | ||
| 65 | ThrowNotImplemented(Opcode::FCHK_cbuf); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::FCHK_imm(u64) { | ||
| 69 | ThrowNotImplemented(Opcode::FCHK_imm); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::GETCRSPTR(u64) { | ||
| 73 | ThrowNotImplemented(Opcode::GETCRSPTR); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::GETLMEMBASE(u64) { | ||
| 77 | ThrowNotImplemented(Opcode::GETLMEMBASE); | ||
| 78 | } | ||
| 79 | |||
| 80 | void TranslatorVisitor::IDE(u64) { | ||
| 81 | ThrowNotImplemented(Opcode::IDE); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::IDP_reg(u64) { | ||
| 85 | ThrowNotImplemented(Opcode::IDP_reg); | ||
| 86 | } | ||
| 87 | |||
| 88 | void TranslatorVisitor::IDP_imm(u64) { | ||
| 89 | ThrowNotImplemented(Opcode::IDP_imm); | ||
| 90 | } | ||
| 91 | |||
| 92 | void TranslatorVisitor::IMAD_reg(u64) { | ||
| 93 | ThrowNotImplemented(Opcode::IMAD_reg); | ||
| 94 | } | ||
| 95 | |||
| 96 | void TranslatorVisitor::IMAD_rc(u64) { | ||
| 97 | ThrowNotImplemented(Opcode::IMAD_rc); | ||
| 98 | } | ||
| 99 | |||
| 100 | void TranslatorVisitor::IMAD_cr(u64) { | ||
| 101 | ThrowNotImplemented(Opcode::IMAD_cr); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::IMAD_imm(u64) { | ||
| 105 | ThrowNotImplemented(Opcode::IMAD_imm); | ||
| 106 | } | ||
| 107 | |||
| 108 | void TranslatorVisitor::IMAD32I(u64) { | ||
| 109 | ThrowNotImplemented(Opcode::IMAD32I); | ||
| 110 | } | ||
| 111 | |||
| 112 | void TranslatorVisitor::IMADSP_reg(u64) { | ||
| 113 | ThrowNotImplemented(Opcode::IMADSP_reg); | ||
| 114 | } | ||
| 115 | |||
| 116 | void TranslatorVisitor::IMADSP_rc(u64) { | ||
| 117 | ThrowNotImplemented(Opcode::IMADSP_rc); | ||
| 118 | } | ||
| 119 | |||
| 120 | void TranslatorVisitor::IMADSP_cr(u64) { | ||
| 121 | ThrowNotImplemented(Opcode::IMADSP_cr); | ||
| 122 | } | ||
| 123 | |||
| 124 | void TranslatorVisitor::IMADSP_imm(u64) { | ||
| 125 | ThrowNotImplemented(Opcode::IMADSP_imm); | ||
| 126 | } | ||
| 127 | |||
| 128 | void TranslatorVisitor::IMUL_reg(u64) { | ||
| 129 | ThrowNotImplemented(Opcode::IMUL_reg); | ||
| 130 | } | ||
| 131 | |||
| 132 | void TranslatorVisitor::IMUL_cbuf(u64) { | ||
| 133 | ThrowNotImplemented(Opcode::IMUL_cbuf); | ||
| 134 | } | ||
| 135 | |||
| 136 | void TranslatorVisitor::IMUL_imm(u64) { | ||
| 137 | ThrowNotImplemented(Opcode::IMUL_imm); | ||
| 138 | } | ||
| 139 | |||
| 140 | void TranslatorVisitor::IMUL32I(u64) { | ||
| 141 | ThrowNotImplemented(Opcode::IMUL32I); | ||
| 142 | } | ||
| 143 | |||
| 144 | void TranslatorVisitor::JCAL(u64) { | ||
| 145 | ThrowNotImplemented(Opcode::JCAL); | ||
| 146 | } | ||
| 147 | |||
| 148 | void TranslatorVisitor::JMP(u64) { | ||
| 149 | ThrowNotImplemented(Opcode::JMP); | ||
| 150 | } | ||
| 151 | |||
| 152 | void TranslatorVisitor::KIL() { | ||
| 153 | // KIL is a no-op | ||
| 154 | } | ||
| 155 | |||
| 156 | void TranslatorVisitor::LD(u64) { | ||
| 157 | ThrowNotImplemented(Opcode::LD); | ||
| 158 | } | ||
| 159 | |||
| 160 | void TranslatorVisitor::LEPC(u64) { | ||
| 161 | ThrowNotImplemented(Opcode::LEPC); | ||
| 162 | } | ||
| 163 | |||
| 164 | void TranslatorVisitor::LONGJMP(u64) { | ||
| 165 | ThrowNotImplemented(Opcode::LONGJMP); | ||
| 166 | } | ||
| 167 | |||
| 168 | void TranslatorVisitor::NOP(u64) { | ||
| 169 | // NOP is No-Op. | ||
| 170 | } | ||
| 171 | |||
| 172 | void TranslatorVisitor::PBK() { | ||
| 173 | // PBK is a no-op | ||
| 174 | } | ||
| 175 | |||
| 176 | void TranslatorVisitor::PCNT() { | ||
| 177 | // PCNT is a no-op | ||
| 178 | } | ||
| 179 | |||
| 180 | void TranslatorVisitor::PEXIT(u64) { | ||
| 181 | ThrowNotImplemented(Opcode::PEXIT); | ||
| 182 | } | ||
| 183 | |||
| 184 | void TranslatorVisitor::PLONGJMP(u64) { | ||
| 185 | ThrowNotImplemented(Opcode::PLONGJMP); | ||
| 186 | } | ||
| 187 | |||
| 188 | void TranslatorVisitor::PRET(u64) { | ||
| 189 | ThrowNotImplemented(Opcode::PRET); | ||
| 190 | } | ||
| 191 | |||
| 192 | void TranslatorVisitor::PRMT_reg(u64) { | ||
| 193 | ThrowNotImplemented(Opcode::PRMT_reg); | ||
| 194 | } | ||
| 195 | |||
| 196 | void TranslatorVisitor::PRMT_rc(u64) { | ||
| 197 | ThrowNotImplemented(Opcode::PRMT_rc); | ||
| 198 | } | ||
| 199 | |||
| 200 | void TranslatorVisitor::PRMT_cr(u64) { | ||
| 201 | ThrowNotImplemented(Opcode::PRMT_cr); | ||
| 202 | } | ||
| 203 | |||
| 204 | void TranslatorVisitor::PRMT_imm(u64) { | ||
| 205 | ThrowNotImplemented(Opcode::PRMT_imm); | ||
| 206 | } | ||
| 207 | |||
| 208 | void TranslatorVisitor::R2B(u64) { | ||
| 209 | ThrowNotImplemented(Opcode::R2B); | ||
| 210 | } | ||
| 211 | |||
| 212 | void TranslatorVisitor::RAM(u64) { | ||
| 213 | ThrowNotImplemented(Opcode::RAM); | ||
| 214 | } | ||
| 215 | |||
| 216 | void TranslatorVisitor::RET(u64) { | ||
| 217 | ThrowNotImplemented(Opcode::RET); | ||
| 218 | } | ||
| 219 | |||
| 220 | void TranslatorVisitor::RTT(u64) { | ||
| 221 | ThrowNotImplemented(Opcode::RTT); | ||
| 222 | } | ||
| 223 | |||
| 224 | void TranslatorVisitor::SAM(u64) { | ||
| 225 | ThrowNotImplemented(Opcode::SAM); | ||
| 226 | } | ||
| 227 | |||
| 228 | void TranslatorVisitor::SETCRSPTR(u64) { | ||
| 229 | ThrowNotImplemented(Opcode::SETCRSPTR); | ||
| 230 | } | ||
| 231 | |||
| 232 | void TranslatorVisitor::SETLMEMBASE(u64) { | ||
| 233 | ThrowNotImplemented(Opcode::SETLMEMBASE); | ||
| 234 | } | ||
| 235 | |||
| 236 | void TranslatorVisitor::SSY() { | ||
| 237 | // SSY is a no-op | ||
| 238 | } | ||
| 239 | |||
| 240 | void TranslatorVisitor::ST(u64) { | ||
| 241 | ThrowNotImplemented(Opcode::ST); | ||
| 242 | } | ||
| 243 | |||
| 244 | void TranslatorVisitor::STP(u64) { | ||
| 245 | ThrowNotImplemented(Opcode::STP); | ||
| 246 | } | ||
| 247 | |||
| 248 | void TranslatorVisitor::SUATOM_cas(u64) { | ||
| 249 | ThrowNotImplemented(Opcode::SUATOM_cas); | ||
| 250 | } | ||
| 251 | |||
| 252 | void TranslatorVisitor::SYNC(u64) { | ||
| 253 | ThrowNotImplemented(Opcode::SYNC); | ||
| 254 | } | ||
| 255 | |||
| 256 | void TranslatorVisitor::TXA(u64) { | ||
| 257 | ThrowNotImplemented(Opcode::TXA); | ||
| 258 | } | ||
| 259 | |||
| 260 | void TranslatorVisitor::VABSDIFF(u64) { | ||
| 261 | ThrowNotImplemented(Opcode::VABSDIFF); | ||
| 262 | } | ||
| 263 | |||
| 264 | void TranslatorVisitor::VABSDIFF4(u64) { | ||
| 265 | ThrowNotImplemented(Opcode::VABSDIFF4); | ||
| 266 | } | ||
| 267 | |||
| 268 | void TranslatorVisitor::VADD(u64) { | ||
| 269 | ThrowNotImplemented(Opcode::VADD); | ||
| 270 | } | ||
| 271 | |||
| 272 | void TranslatorVisitor::VSET(u64) { | ||
| 273 | ThrowNotImplemented(Opcode::VSET); | ||
| 274 | } | ||
| 275 | void TranslatorVisitor::VSHL(u64) { | ||
| 276 | ThrowNotImplemented(Opcode::VSHL); | ||
| 277 | } | ||
| 278 | |||
| 279 | void TranslatorVisitor::VSHR(u64) { | ||
| 280 | ThrowNotImplemented(Opcode::VSHR); | ||
| 281 | } | ||
| 282 | |||
| 283 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp new file mode 100644 index 000000000..01cfad88d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> output_reg; // Not needed on host | ||
| 16 | BitField<39, 1, u64> emit; | ||
| 17 | BitField<40, 1, u64> cut; | ||
| 18 | } const out{insn}; | ||
| 19 | |||
| 20 | stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11)); | ||
| 21 | |||
| 22 | if (out.emit != 0) { | ||
| 23 | v.ir.EmitVertex(stream_index); | ||
| 24 | } | ||
| 25 | if (out.cut != 0) { | ||
| 26 | v.ir.EndPrimitive(stream_index); | ||
| 27 | } | ||
| 28 | // Host doesn't need the output register, but we can write to it to avoid undefined reads | ||
| 29 | v.X(out.dest_reg, v.ir.Imm32(0)); | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::OUT_reg(u64 insn) { | ||
| 34 | OUT(*this, insn, GetReg20(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::OUT_cbuf(u64 insn) { | ||
| 38 | OUT(*this, insn, GetCbuf(insn)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void TranslatorVisitor::OUT_imm(u64 insn) { | ||
| 42 | OUT(*this, insn, GetImm20(insn)); | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | Default, | ||
| 13 | CovMask, | ||
| 14 | Covered, | ||
| 15 | Offset, | ||
| 16 | CentroidOffset, | ||
| 17 | MyIndex, | ||
| 18 | }; | ||
| 19 | } // Anonymous namespace | ||
| 20 | |||
| 21 | void TranslatorVisitor::PIXLD(u64 insn) { | ||
| 22 | union { | ||
| 23 | u64 raw; | ||
| 24 | BitField<31, 3, Mode> mode; | ||
| 25 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 26 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 27 | BitField<20, 8, s64> addr_offset; | ||
| 28 | BitField<45, 3, IR::Pred> dest_pred; | ||
| 29 | } const pixld{insn}; | ||
| 30 | |||
| 31 | if (pixld.dest_pred != IR::Pred::PT) { | ||
| 32 | throw NotImplementedException("Destination predicate"); | ||
| 33 | } | ||
| 34 | if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { | ||
| 35 | throw NotImplementedException("Non-zero source register"); | ||
| 36 | } | ||
| 37 | switch (pixld.mode) { | ||
| 38 | case Mode::MyIndex: | ||
| 39 | X(pixld.dest_reg, ir.SampleId()); | ||
| 40 | break; | ||
| 41 | default: | ||
| 42 | throw NotImplementedException("Mode {}", pixld.mode.Value()); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp new file mode 100644 index 000000000..75d1fa8c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::PSETP(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 15 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 16 | BitField<12, 3, IR::Pred> pred_a; | ||
| 17 | BitField<15, 1, u64> neg_pred_a; | ||
| 18 | BitField<24, 2, BooleanOp> bop_1; | ||
| 19 | BitField<29, 3, IR::Pred> pred_b; | ||
| 20 | BitField<32, 1, u64> neg_pred_b; | ||
| 21 | BitField<39, 3, IR::Pred> pred_c; | ||
| 22 | BitField<42, 1, u64> neg_pred_c; | ||
| 23 | BitField<45, 2, BooleanOp> bop_2; | ||
| 24 | } const pset{insn}; | ||
| 25 | |||
| 26 | const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; | ||
| 27 | const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; | ||
| 28 | const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; | ||
| 29 | |||
| 30 | const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; | ||
| 31 | const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)}; | ||
| 32 | const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)}; | ||
| 33 | const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)}; | ||
| 34 | |||
| 35 | ir.SetPred(pset.dest_pred_a, result_a); | ||
| 36 | ir.SetPred(pset.dest_pred_b, result_b); | ||
| 37 | } | ||
| 38 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp new file mode 100644 index 000000000..b02789874 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::PSET(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<12, 3, IR::Pred> pred_a; | ||
| 16 | BitField<15, 1, u64> neg_pred_a; | ||
| 17 | BitField<24, 2, BooleanOp> bop_1; | ||
| 18 | BitField<29, 3, IR::Pred> pred_b; | ||
| 19 | BitField<32, 1, u64> neg_pred_b; | ||
| 20 | BitField<39, 3, IR::Pred> pred_c; | ||
| 21 | BitField<42, 1, u64> neg_pred_c; | ||
| 22 | BitField<44, 1, u64> bf; | ||
| 23 | BitField<45, 2, BooleanOp> bop_2; | ||
| 24 | BitField<47, 1, u64> cc; | ||
| 25 | } const pset{insn}; | ||
| 26 | |||
| 27 | const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; | ||
| 28 | const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; | ||
| 29 | const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; | ||
| 30 | |||
| 31 | const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; | ||
| 32 | const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; | ||
| 33 | |||
| 34 | const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; | ||
| 35 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 36 | |||
| 37 | const IR::U32 result{ir.Select(res_2, true_result, zero)}; | ||
| 38 | |||
| 39 | X(pset.dest_reg, result); | ||
| 40 | if (pset.cc != 0) { | ||
| 41 | const IR::U1 is_zero{ir.IEqual(result, zero)}; | ||
| 42 | SetZFlag(is_zero); | ||
| 43 | if (pset.bf != 0) { | ||
| 44 | ResetSFlag(); | ||
| 45 | } else { | ||
| 46 | SetSFlag(ir.LogicalNot(is_zero)); | ||
| 47 | } | ||
| 48 | ResetOFlag(); | ||
| 49 | ResetCFlag(); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp new file mode 100644 index 000000000..93baa75a9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | |||
| 12 | void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | } const sel{insn}; | ||
| 20 | |||
| 21 | const IR::U1 pred = v.ir.GetPred(sel.pred); | ||
| 22 | IR::U32 op_a{v.X(sel.src_reg)}; | ||
| 23 | IR::U32 op_b{src}; | ||
| 24 | if (sel.neg_pred != 0) { | ||
| 25 | std::swap(op_a, op_b); | ||
| 26 | } | ||
| 27 | const IR::U32 result{v.ir.Select(pred, op_a, op_b)}; | ||
| 28 | |||
| 29 | v.X(sel.dest_reg, result); | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::SEL_reg(u64 insn) { | ||
| 34 | SEL(*this, insn, GetReg20(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::SEL_cbuf(u64 insn) { | ||
| 38 | SEL(*this, insn, GetCbuf(insn)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void TranslatorVisitor::SEL_imm(u64 insn) { | ||
| 42 | SEL(*this, insn, GetImm20(insn)); | ||
| 43 | } | ||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp new file mode 100644 index 000000000..63b588ad4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp | |||
| @@ -0,0 +1,205 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | namespace { | ||
| 15 | enum class Type : u64 { | ||
| 16 | _1D, | ||
| 17 | BUFFER_1D, | ||
| 18 | ARRAY_1D, | ||
| 19 | _2D, | ||
| 20 | ARRAY_2D, | ||
| 21 | _3D, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class Size : u64 { | ||
| 25 | U32, | ||
| 26 | S32, | ||
| 27 | U64, | ||
| 28 | S64, | ||
| 29 | F32FTZRN, | ||
| 30 | F16x2FTZRN, | ||
| 31 | SD32, | ||
| 32 | SD64, | ||
| 33 | }; | ||
| 34 | |||
| 35 | enum class AtomicOp : u64 { | ||
| 36 | ADD, | ||
| 37 | MIN, | ||
| 38 | MAX, | ||
| 39 | INC, | ||
| 40 | DEC, | ||
| 41 | AND, | ||
| 42 | OR, | ||
| 43 | XOR, | ||
| 44 | EXCH, | ||
| 45 | }; | ||
| 46 | |||
| 47 | enum class Clamp : u64 { | ||
| 48 | IGN, | ||
| 49 | Default, | ||
| 50 | TRAP, | ||
| 51 | }; | ||
| 52 | |||
| 53 | TextureType GetType(Type type) { | ||
| 54 | switch (type) { | ||
| 55 | case Type::_1D: | ||
| 56 | return TextureType::Color1D; | ||
| 57 | case Type::BUFFER_1D: | ||
| 58 | return TextureType::Buffer; | ||
| 59 | case Type::ARRAY_1D: | ||
| 60 | return TextureType::ColorArray1D; | ||
| 61 | case Type::_2D: | ||
| 62 | return TextureType::Color2D; | ||
| 63 | case Type::ARRAY_2D: | ||
| 64 | return TextureType::ColorArray2D; | ||
| 65 | case Type::_3D: | ||
| 66 | return TextureType::Color3D; | ||
| 67 | } | ||
| 68 | throw NotImplementedException("Invalid type {}", type); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { | ||
| 72 | switch (type) { | ||
| 73 | case Type::_1D: | ||
| 74 | case Type::BUFFER_1D: | ||
| 75 | return v.X(reg); | ||
| 76 | case Type::_2D: | ||
| 77 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 78 | case Type::_3D: | ||
| 79 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 80 | default: | ||
| 81 | break; | ||
| 82 | } | ||
| 83 | throw NotImplementedException("Invalid type {}", type); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords, | ||
| 87 | const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op, | ||
| 88 | bool is_signed) { | ||
| 89 | switch (op) { | ||
| 90 | case AtomicOp::ADD: | ||
| 91 | return ir.ImageAtomicIAdd(handle, coords, op_b, info); | ||
| 92 | case AtomicOp::MIN: | ||
| 93 | return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info); | ||
| 94 | case AtomicOp::MAX: | ||
| 95 | return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info); | ||
| 96 | case AtomicOp::INC: | ||
| 97 | return ir.ImageAtomicInc(handle, coords, op_b, info); | ||
| 98 | case AtomicOp::DEC: | ||
| 99 | return ir.ImageAtomicDec(handle, coords, op_b, info); | ||
| 100 | case AtomicOp::AND: | ||
| 101 | return ir.ImageAtomicAnd(handle, coords, op_b, info); | ||
| 102 | case AtomicOp::OR: | ||
| 103 | return ir.ImageAtomicOr(handle, coords, op_b, info); | ||
| 104 | case AtomicOp::XOR: | ||
| 105 | return ir.ImageAtomicXor(handle, coords, op_b, info); | ||
| 106 | case AtomicOp::EXCH: | ||
| 107 | return ir.ImageAtomicExchange(handle, coords, op_b, info); | ||
| 108 | default: | ||
| 109 | throw NotImplementedException("Atomic Operation {}", op); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | ImageFormat Format(Size size) { | ||
| 114 | switch (size) { | ||
| 115 | case Size::U32: | ||
| 116 | case Size::S32: | ||
| 117 | case Size::SD32: | ||
| 118 | return ImageFormat::R32_UINT; | ||
| 119 | default: | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | throw NotImplementedException("Invalid size {}", size); | ||
| 123 | } | ||
| 124 | |||
| 125 | bool IsSizeInt32(Size size) { | ||
| 126 | switch (size) { | ||
| 127 | case Size::U32: | ||
| 128 | case Size::S32: | ||
| 129 | case Size::SD32: | ||
| 130 | return true; | ||
| 131 | default: | ||
| 132 | return false; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg, | ||
| 137 | IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type, | ||
| 138 | u64 bound_offset, bool is_bindless, bool write_result) { | ||
| 139 | if (clamp != Clamp::IGN) { | ||
| 140 | throw NotImplementedException("Clamp {}", clamp); | ||
| 141 | } | ||
| 142 | if (!IsSizeInt32(size)) { | ||
| 143 | throw NotImplementedException("Size {}", size); | ||
| 144 | } | ||
| 145 | const bool is_signed{size == Size::S32}; | ||
| 146 | const ImageFormat format{Format(size)}; | ||
| 147 | const TextureType tex_type{GetType(type)}; | ||
| 148 | const IR::Value coords{MakeCoords(v, coord_reg, type)}; | ||
| 149 | |||
| 150 | const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg) | ||
| 151 | : v.ir.Imm32(static_cast<u32>(bound_offset * 4))}; | ||
| 152 | IR::TextureInstInfo info{}; | ||
| 153 | info.type.Assign(tex_type); | ||
| 154 | info.image_format.Assign(format); | ||
| 155 | |||
| 156 | // TODO: float/64-bit operand | ||
| 157 | const IR::Value op_b{v.X(operand_reg)}; | ||
| 158 | const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)}; | ||
| 159 | |||
| 160 | if (write_result) { | ||
| 161 | v.X(dest_reg, IR::U32{color}); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | } // Anonymous namespace | ||
| 165 | |||
| 166 | void TranslatorVisitor::SUATOM(u64 insn) { | ||
| 167 | union { | ||
| 168 | u64 raw; | ||
| 169 | BitField<54, 1, u64> is_bindless; | ||
| 170 | BitField<29, 4, AtomicOp> op; | ||
| 171 | BitField<33, 3, Type> type; | ||
| 172 | BitField<51, 3, Size> size; | ||
| 173 | BitField<49, 2, Clamp> clamp; | ||
| 174 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 175 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 176 | BitField<20, 8, IR::Reg> operand_reg; | ||
| 177 | BitField<36, 13, u64> bound_offset; // !is_bindless | ||
| 178 | BitField<39, 8, IR::Reg> bindless_reg; // is_bindless | ||
| 179 | } const suatom{insn}; | ||
| 180 | |||
| 181 | ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg, | ||
| 182 | suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset, | ||
| 183 | suatom.is_bindless != 0, true); | ||
| 184 | } | ||
| 185 | |||
| 186 | void TranslatorVisitor::SURED(u64 insn) { | ||
| 187 | // TODO: confirm offsets | ||
| 188 | union { | ||
| 189 | u64 raw; | ||
| 190 | BitField<51, 1, u64> is_bound; | ||
| 191 | BitField<21, 3, AtomicOp> op; | ||
| 192 | BitField<33, 3, Type> type; | ||
| 193 | BitField<20, 3, Size> size; | ||
| 194 | BitField<49, 2, Clamp> clamp; | ||
| 195 | BitField<0, 8, IR::Reg> operand_reg; | ||
| 196 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 197 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 198 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 199 | } const sured{insn}; | ||
| 200 | ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg, | ||
| 201 | sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset, | ||
| 202 | sured.is_bound == 0, false); | ||
| 203 | } | ||
| 204 | |||
| 205 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp new file mode 100644 index 000000000..681220a8d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp | |||
| @@ -0,0 +1,281 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | namespace { | ||
| 15 | enum class Type : u64 { | ||
| 16 | _1D, | ||
| 17 | BUFFER_1D, | ||
| 18 | ARRAY_1D, | ||
| 19 | _2D, | ||
| 20 | ARRAY_2D, | ||
| 21 | _3D, | ||
| 22 | }; | ||
| 23 | |||
| 24 | constexpr unsigned R = 1 << 0; | ||
| 25 | constexpr unsigned G = 1 << 1; | ||
| 26 | constexpr unsigned B = 1 << 2; | ||
| 27 | constexpr unsigned A = 1 << 3; | ||
| 28 | |||
| 29 | constexpr std::array MASK{ | ||
| 30 | 0U, // | ||
| 31 | R, // | ||
| 32 | G, // | ||
| 33 | R | G, // | ||
| 34 | B, // | ||
| 35 | R | B, // | ||
| 36 | G | B, // | ||
| 37 | R | G | B, // | ||
| 38 | A, // | ||
| 39 | R | A, // | ||
| 40 | G | A, // | ||
| 41 | R | G | A, // | ||
| 42 | B | A, // | ||
| 43 | R | B | A, // | ||
| 44 | G | B | A, // | ||
| 45 | R | G | B | A, // | ||
| 46 | }; | ||
| 47 | |||
| 48 | enum class Size : u64 { | ||
| 49 | U8, | ||
| 50 | S8, | ||
| 51 | U16, | ||
| 52 | S16, | ||
| 53 | B32, | ||
| 54 | B64, | ||
| 55 | B128, | ||
| 56 | }; | ||
| 57 | |||
| 58 | enum class Clamp : u64 { | ||
| 59 | IGN, | ||
| 60 | Default, | ||
| 61 | TRAP, | ||
| 62 | }; | ||
| 63 | |||
| 64 | // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators | ||
| 65 | enum class LoadCache : u64 { | ||
| 66 | CA, // Cache at all levels, likely to be accessed again | ||
| 67 | CG, // Cache at global level (L2 and below, not L1) | ||
| 68 | CI, // ??? | ||
| 69 | CV, // Don't cache and fetch again (volatile) | ||
| 70 | }; | ||
| 71 | |||
| 72 | enum class StoreCache : u64 { | ||
| 73 | WB, // Cache write-back all coherent levels | ||
| 74 | CG, // Cache at global level (L2 and below, not L1) | ||
| 75 | CS, // Cache streaming, likely to be accessed once | ||
| 76 | WT, // Cache write-through (to system memory, volatile?) | ||
| 77 | }; | ||
| 78 | |||
| 79 | ImageFormat Format(Size size) { | ||
| 80 | switch (size) { | ||
| 81 | case Size::U8: | ||
| 82 | return ImageFormat::R8_UINT; | ||
| 83 | case Size::S8: | ||
| 84 | return ImageFormat::R8_SINT; | ||
| 85 | case Size::U16: | ||
| 86 | return ImageFormat::R16_UINT; | ||
| 87 | case Size::S16: | ||
| 88 | return ImageFormat::R16_SINT; | ||
| 89 | case Size::B32: | ||
| 90 | return ImageFormat::R32_UINT; | ||
| 91 | case Size::B64: | ||
| 92 | return ImageFormat::R32G32_UINT; | ||
| 93 | case Size::B128: | ||
| 94 | return ImageFormat::R32G32B32A32_UINT; | ||
| 95 | } | ||
| 96 | throw NotImplementedException("Invalid size {}", size); | ||
| 97 | } | ||
| 98 | |||
| 99 | int SizeInRegs(Size size) { | ||
| 100 | switch (size) { | ||
| 101 | case Size::U8: | ||
| 102 | case Size::S8: | ||
| 103 | case Size::U16: | ||
| 104 | case Size::S16: | ||
| 105 | case Size::B32: | ||
| 106 | return 1; | ||
| 107 | case Size::B64: | ||
| 108 | return 2; | ||
| 109 | case Size::B128: | ||
| 110 | return 4; | ||
| 111 | } | ||
| 112 | throw NotImplementedException("Invalid size {}", size); | ||
| 113 | } | ||
| 114 | |||
| 115 | TextureType GetType(Type type) { | ||
| 116 | switch (type) { | ||
| 117 | case Type::_1D: | ||
| 118 | return TextureType::Color1D; | ||
| 119 | case Type::BUFFER_1D: | ||
| 120 | return TextureType::Buffer; | ||
| 121 | case Type::ARRAY_1D: | ||
| 122 | return TextureType::ColorArray1D; | ||
| 123 | case Type::_2D: | ||
| 124 | return TextureType::Color2D; | ||
| 125 | case Type::ARRAY_2D: | ||
| 126 | return TextureType::ColorArray2D; | ||
| 127 | case Type::_3D: | ||
| 128 | return TextureType::Color3D; | ||
| 129 | } | ||
| 130 | throw NotImplementedException("Invalid type {}", type); | ||
| 131 | } | ||
| 132 | |||
| 133 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { | ||
| 134 | const auto array{[&](int index) { | ||
| 135 | return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); | ||
| 136 | }}; | ||
| 137 | switch (type) { | ||
| 138 | case Type::_1D: | ||
| 139 | case Type::BUFFER_1D: | ||
| 140 | return v.X(reg); | ||
| 141 | case Type::ARRAY_1D: | ||
| 142 | return v.ir.CompositeConstruct(v.X(reg), array(1)); | ||
| 143 | case Type::_2D: | ||
| 144 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 145 | case Type::ARRAY_2D: | ||
| 146 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); | ||
| 147 | case Type::_3D: | ||
| 148 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 149 | } | ||
| 150 | throw NotImplementedException("Invalid type {}", type); | ||
| 151 | } | ||
| 152 | |||
| 153 | unsigned SwizzleMask(u64 swizzle) { | ||
| 154 | if (swizzle == 0 || swizzle >= MASK.size()) { | ||
| 155 | throw NotImplementedException("Invalid swizzle {}", swizzle); | ||
| 156 | } | ||
| 157 | return MASK[swizzle]; | ||
| 158 | } | ||
| 159 | |||
| 160 | IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { | ||
| 161 | std::array<IR::U32, 4> colors; | ||
| 162 | for (int i = 0; i < num_regs; ++i) { | ||
| 163 | colors[static_cast<size_t>(i)] = ir.GetReg(reg + i); | ||
| 164 | } | ||
| 165 | for (int i = num_regs; i < 4; ++i) { | ||
| 166 | colors[static_cast<size_t>(i)] = ir.Imm32(0); | ||
| 167 | } | ||
| 168 | return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); | ||
| 169 | } | ||
| 170 | } // Anonymous namespace | ||
| 171 | |||
| 172 | void TranslatorVisitor::SULD(u64 insn) { | ||
| 173 | union { | ||
| 174 | u64 raw; | ||
| 175 | BitField<51, 1, u64> is_bound; | ||
| 176 | BitField<52, 1, u64> d; | ||
| 177 | BitField<23, 1, u64> ba; | ||
| 178 | BitField<33, 3, Type> type; | ||
| 179 | BitField<24, 2, LoadCache> cache; | ||
| 180 | BitField<20, 3, Size> size; // .D | ||
| 181 | BitField<20, 4, u64> swizzle; // .P | ||
| 182 | BitField<49, 2, Clamp> clamp; | ||
| 183 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 184 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 185 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 186 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 187 | } const suld{insn}; | ||
| 188 | |||
| 189 | if (suld.clamp != Clamp::IGN) { | ||
| 190 | throw NotImplementedException("Clamp {}", suld.clamp.Value()); | ||
| 191 | } | ||
| 192 | if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) { | ||
| 193 | throw NotImplementedException("Cache {}", suld.cache.Value()); | ||
| 194 | } | ||
| 195 | const bool is_typed{suld.d != 0}; | ||
| 196 | if (is_typed && suld.ba != 0) { | ||
| 197 | throw NotImplementedException("BA"); | ||
| 198 | } | ||
| 199 | |||
| 200 | const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless}; | ||
| 201 | const TextureType type{GetType(suld.type)}; | ||
| 202 | const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)}; | ||
| 203 | const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4)) | ||
| 204 | : X(suld.bindless_reg)}; | ||
| 205 | IR::TextureInstInfo info{}; | ||
| 206 | info.type.Assign(type); | ||
| 207 | info.image_format.Assign(format); | ||
| 208 | |||
| 209 | const IR::Value result{ir.ImageRead(handle, coords, info)}; | ||
| 210 | IR::Reg dest_reg{suld.dest_reg}; | ||
| 211 | if (is_typed) { | ||
| 212 | const int num_regs{SizeInRegs(suld.size)}; | ||
| 213 | for (int i = 0; i < num_regs; ++i) { | ||
| 214 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 215 | } | ||
| 216 | } else { | ||
| 217 | const unsigned mask{SwizzleMask(suld.swizzle)}; | ||
| 218 | const int bits{std::popcount(mask)}; | ||
| 219 | if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) { | ||
| 220 | throw NotImplementedException("Unaligned destination register"); | ||
| 221 | } | ||
| 222 | for (unsigned component = 0; component < 4; ++component) { | ||
| 223 | if (((mask >> component) & 1) == 0) { | ||
| 224 | continue; | ||
| 225 | } | ||
| 226 | X(dest_reg, IR::U32{ir.CompositeExtract(result, component)}); | ||
| 227 | ++dest_reg; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 232 | void TranslatorVisitor::SUST(u64 insn) { | ||
| 233 | union { | ||
| 234 | u64 raw; | ||
| 235 | BitField<51, 1, u64> is_bound; | ||
| 236 | BitField<52, 1, u64> d; | ||
| 237 | BitField<23, 1, u64> ba; | ||
| 238 | BitField<33, 3, Type> type; | ||
| 239 | BitField<24, 2, StoreCache> cache; | ||
| 240 | BitField<20, 3, Size> size; // .D | ||
| 241 | BitField<20, 4, u64> swizzle; // .P | ||
| 242 | BitField<49, 2, Clamp> clamp; | ||
| 243 | BitField<0, 8, IR::Reg> data_reg; | ||
| 244 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 245 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 246 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 247 | } const sust{insn}; | ||
| 248 | |||
| 249 | if (sust.clamp != Clamp::IGN) { | ||
| 250 | throw NotImplementedException("Clamp {}", sust.clamp.Value()); | ||
| 251 | } | ||
| 252 | if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) { | ||
| 253 | throw NotImplementedException("Cache {}", sust.cache.Value()); | ||
| 254 | } | ||
| 255 | const bool is_typed{sust.d != 0}; | ||
| 256 | if (is_typed && sust.ba != 0) { | ||
| 257 | throw NotImplementedException("BA"); | ||
| 258 | } | ||
| 259 | const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless}; | ||
| 260 | const TextureType type{GetType(sust.type)}; | ||
| 261 | const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)}; | ||
| 262 | const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4)) | ||
| 263 | : X(sust.bindless_reg)}; | ||
| 264 | IR::TextureInstInfo info{}; | ||
| 265 | info.type.Assign(type); | ||
| 266 | info.image_format.Assign(format); | ||
| 267 | |||
| 268 | IR::Value color; | ||
| 269 | if (is_typed) { | ||
| 270 | color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size)); | ||
| 271 | } else { | ||
| 272 | const unsigned mask{SwizzleMask(sust.swizzle)}; | ||
| 273 | if (mask != 0xf) { | ||
| 274 | throw NotImplementedException("Non-full mask"); | ||
| 275 | } | ||
| 276 | color = MakeColor(ir, sust.data_reg, 4); | ||
| 277 | } | ||
| 278 | ir.ImageWrite(handle, coords, color, info); | ||
| 279 | } | ||
| 280 | |||
| 281 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp new file mode 100644 index 000000000..0046b5edd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp | |||
| @@ -0,0 +1,236 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Blod : u64 { | ||
| 15 | None, | ||
| 16 | LZ, | ||
| 17 | LB, | ||
| 18 | LL, | ||
| 19 | INVALIDBLOD4, | ||
| 20 | INVALIDBLOD5, | ||
| 21 | LBA, | ||
| 22 | LLA, | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum class TextureType : u64 { | ||
| 26 | _1D, | ||
| 27 | ARRAY_1D, | ||
| 28 | _2D, | ||
| 29 | ARRAY_2D, | ||
| 30 | _3D, | ||
| 31 | ARRAY_3D, | ||
| 32 | CUBE, | ||
| 33 | ARRAY_CUBE, | ||
| 34 | }; | ||
| 35 | |||
| 36 | Shader::TextureType GetType(TextureType type) { | ||
| 37 | switch (type) { | ||
| 38 | case TextureType::_1D: | ||
| 39 | return Shader::TextureType::Color1D; | ||
| 40 | case TextureType::ARRAY_1D: | ||
| 41 | return Shader::TextureType::ColorArray1D; | ||
| 42 | case TextureType::_2D: | ||
| 43 | return Shader::TextureType::Color2D; | ||
| 44 | case TextureType::ARRAY_2D: | ||
| 45 | return Shader::TextureType::ColorArray2D; | ||
| 46 | case TextureType::_3D: | ||
| 47 | return Shader::TextureType::Color3D; | ||
| 48 | case TextureType::ARRAY_3D: | ||
| 49 | throw NotImplementedException("3D array texture type"); | ||
| 50 | case TextureType::CUBE: | ||
| 51 | return Shader::TextureType::ColorCube; | ||
| 52 | case TextureType::ARRAY_CUBE: | ||
| 53 | return Shader::TextureType::ColorArrayCube; | ||
| 54 | } | ||
| 55 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 56 | } | ||
| 57 | |||
| 58 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 59 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; | ||
| 60 | switch (type) { | ||
| 61 | case TextureType::_1D: | ||
| 62 | return v.F(reg); | ||
| 63 | case TextureType::ARRAY_1D: | ||
| 64 | return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); | ||
| 65 | case TextureType::_2D: | ||
| 66 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 67 | case TextureType::ARRAY_2D: | ||
| 68 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); | ||
| 69 | case TextureType::_3D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 71 | case TextureType::ARRAY_3D: | ||
| 72 | throw NotImplementedException("3D array texture type"); | ||
| 73 | case TextureType::CUBE: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_CUBE: | ||
| 76 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); | ||
| 77 | } | ||
| 78 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 79 | } | ||
| 80 | |||
| 81 | IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { | ||
| 82 | switch (blod) { | ||
| 83 | case Blod::None: | ||
| 84 | return v.ir.Imm32(0.0f); | ||
| 85 | case Blod::LZ: | ||
| 86 | return v.ir.Imm32(0.0f); | ||
| 87 | case Blod::LB: | ||
| 88 | case Blod::LL: | ||
| 89 | case Blod::LBA: | ||
| 90 | case Blod::LLA: | ||
| 91 | return v.F(reg++); | ||
| 92 | case Blod::INVALIDBLOD4: | ||
| 93 | case Blod::INVALIDBLOD5: | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | throw NotImplementedException("Invalid blod {}", blod); | ||
| 97 | } | ||
| 98 | |||
| 99 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 100 | const IR::U32 value{v.X(reg++)}; | ||
| 101 | switch (type) { | ||
| 102 | case TextureType::_1D: | ||
| 103 | case TextureType::ARRAY_1D: | ||
| 104 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); | ||
| 105 | case TextureType::_2D: | ||
| 106 | case TextureType::ARRAY_2D: | ||
| 107 | return v.ir.CompositeConstruct( | ||
| 108 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 109 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 110 | case TextureType::_3D: | ||
| 111 | case TextureType::ARRAY_3D: | ||
| 112 | return v.ir.CompositeConstruct( | ||
| 113 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 114 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), | ||
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||
| 116 | case TextureType::CUBE: | ||
| 117 | case TextureType::ARRAY_CUBE: | ||
| 118 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 119 | } | ||
| 120 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 121 | } | ||
| 122 | |||
| 123 | bool HasExplicitLod(Blod blod) { | ||
| 124 | switch (blod) { | ||
| 125 | case Blod::LL: | ||
| 126 | case Blod::LLA: | ||
| 127 | case Blod::LZ: | ||
| 128 | return true; | ||
| 129 | default: | ||
| 130 | return false; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, | ||
| 135 | std::optional<u32> cbuf_offset) { | ||
| 136 | union { | ||
| 137 | u64 raw; | ||
| 138 | BitField<35, 1, u64> ndv; | ||
| 139 | BitField<49, 1, u64> nodep; | ||
| 140 | BitField<50, 1, u64> dc; | ||
| 141 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 142 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 143 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 144 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 145 | BitField<28, 3, TextureType> type; | ||
| 146 | BitField<31, 4, u64> mask; | ||
| 147 | } const tex{insn}; | ||
| 148 | |||
| 149 | if (lc) { | ||
| 150 | throw NotImplementedException("LC"); | ||
| 151 | } | ||
| 152 | const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; | ||
| 153 | |||
| 154 | IR::Reg meta_reg{tex.meta_reg}; | ||
| 155 | IR::Value handle; | ||
| 156 | IR::Value offset; | ||
| 157 | IR::F32 dref; | ||
| 158 | IR::F32 lod_clamp; | ||
| 159 | if (cbuf_offset) { | ||
| 160 | handle = v.ir.Imm32(*cbuf_offset); | ||
| 161 | } else { | ||
| 162 | handle = v.X(meta_reg++); | ||
| 163 | } | ||
| 164 | const IR::F32 lod{MakeLod(v, meta_reg, blod)}; | ||
| 165 | if (aoffi) { | ||
| 166 | offset = MakeOffset(v, meta_reg, tex.type); | ||
| 167 | } | ||
| 168 | if (tex.dc != 0) { | ||
| 169 | dref = v.F(meta_reg++); | ||
| 170 | } | ||
| 171 | IR::TextureInstInfo info{}; | ||
| 172 | info.type.Assign(GetType(tex.type)); | ||
| 173 | info.is_depth.Assign(tex.dc != 0 ? 1 : 0); | ||
| 174 | info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); | ||
| 175 | info.has_lod_clamp.Assign(lc ? 1 : 0); | ||
| 176 | |||
| 177 | const IR::Value sample{[&]() -> IR::Value { | ||
| 178 | if (tex.dc == 0) { | ||
| 179 | if (HasExplicitLod(blod)) { | ||
| 180 | return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info); | ||
| 181 | } else { | ||
| 182 | return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); | ||
| 183 | } | ||
| 184 | } | ||
| 185 | if (HasExplicitLod(blod)) { | ||
| 186 | return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info); | ||
| 187 | } else { | ||
| 188 | return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, | ||
| 189 | info); | ||
| 190 | } | ||
| 191 | }()}; | ||
| 192 | |||
| 193 | IR::Reg dest_reg{tex.dest_reg}; | ||
| 194 | for (int element = 0; element < 4; ++element) { | ||
| 195 | if (((tex.mask >> element) & 1) == 0) { | ||
| 196 | continue; | ||
| 197 | } | ||
| 198 | IR::F32 value; | ||
| 199 | if (tex.dc != 0) { | ||
| 200 | value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); | ||
| 201 | } else { | ||
| 202 | value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))}; | ||
| 203 | } | ||
| 204 | v.F(dest_reg, value); | ||
| 205 | ++dest_reg; | ||
| 206 | } | ||
| 207 | if (tex.sparse_pred != IR::Pred::PT) { | ||
| 208 | v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | } // Anonymous namespace | ||
| 212 | |||
| 213 | void TranslatorVisitor::TEX(u64 insn) { | ||
| 214 | union { | ||
| 215 | u64 raw; | ||
| 216 | BitField<54, 1, u64> aoffi; | ||
| 217 | BitField<55, 3, Blod> blod; | ||
| 218 | BitField<58, 1, u64> lc; | ||
| 219 | BitField<36, 13, u64> cbuf_offset; | ||
| 220 | } const tex{insn}; | ||
| 221 | |||
| 222 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4)); | ||
| 223 | } | ||
| 224 | |||
| 225 | void TranslatorVisitor::TEX_b(u64 insn) { | ||
| 226 | union { | ||
| 227 | u64 raw; | ||
| 228 | BitField<36, 1, u64> aoffi; | ||
| 229 | BitField<37, 3, Blod> blod; | ||
| 230 | BitField<40, 1, u64> lc; | ||
| 231 | } const tex{insn}; | ||
| 232 | |||
| 233 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); | ||
| 234 | } | ||
| 235 | |||
| 236 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp new file mode 100644 index 000000000..154e7f1a1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | |||
| @@ -0,0 +1,266 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F16, | ||
| 16 | F32, | ||
| 17 | }; | ||
| 18 | |||
| 19 | union Encoding { | ||
| 20 | u64 raw; | ||
| 21 | BitField<59, 1, Precision> precision; | ||
| 22 | BitField<53, 4, u64> encoding; | ||
| 23 | BitField<49, 1, u64> nodep; | ||
| 24 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 25 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 26 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 27 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 28 | BitField<36, 13, u64> cbuf_offset; | ||
| 29 | BitField<50, 3, u64> swizzle; | ||
| 30 | }; | ||
| 31 | |||
| 32 | constexpr unsigned R = 1; | ||
| 33 | constexpr unsigned G = 2; | ||
| 34 | constexpr unsigned B = 4; | ||
| 35 | constexpr unsigned A = 8; | ||
| 36 | |||
| 37 | constexpr std::array RG_LUT{ | ||
| 38 | R, // | ||
| 39 | G, // | ||
| 40 | B, // | ||
| 41 | A, // | ||
| 42 | R | G, // | ||
| 43 | R | A, // | ||
| 44 | G | A, // | ||
| 45 | B | A, // | ||
| 46 | }; | ||
| 47 | |||
| 48 | constexpr std::array RGBA_LUT{ | ||
| 49 | R | G | B, // | ||
| 50 | R | G | A, // | ||
| 51 | R | B | A, // | ||
| 52 | G | B | A, // | ||
| 53 | R | G | B | A, // | ||
| 54 | }; | ||
| 55 | |||
| 56 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 57 | if (!IR::IsAligned(reg, alignment)) { | ||
| 58 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | template <typename... Args> | ||
| 63 | IR::Value Composite(TranslatorVisitor& v, Args... regs) { | ||
| 64 | return v.ir.CompositeConstruct(v.F(regs)...); | ||
| 65 | } | ||
| 66 | |||
| 67 | IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { | ||
| 68 | return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 72 | const Encoding texs{insn}; | ||
| 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))}; | ||
| 74 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 75 | const IR::Reg reg_a{texs.src_reg_a}; | ||
| 76 | const IR::Reg reg_b{texs.src_reg_b}; | ||
| 77 | IR::TextureInstInfo info{}; | ||
| 78 | if (texs.precision == Precision::F16) { | ||
| 79 | info.relaxed_precision.Assign(1); | ||
| 80 | } | ||
| 81 | switch (texs.encoding) { | ||
| 82 | case 0: // 1D.LZ | ||
| 83 | info.type.Assign(TextureType::Color1D); | ||
| 84 | return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info); | ||
| 85 | case 1: // 2D | ||
| 86 | info.type.Assign(TextureType::Color2D); | ||
| 87 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); | ||
| 88 | case 2: // 2D.LZ | ||
| 89 | info.type.Assign(TextureType::Color2D); | ||
| 90 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info); | ||
| 91 | case 3: // 2D.LL | ||
| 92 | CheckAlignment(reg_a, 2); | ||
| 93 | info.type.Assign(TextureType::Color2D); | ||
| 94 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, | ||
| 95 | info); | ||
| 96 | case 4: // 2D.DC | ||
| 97 | CheckAlignment(reg_a, 2); | ||
| 98 | info.type.Assign(TextureType::Color2D); | ||
| 99 | info.is_depth.Assign(1); | ||
| 100 | return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), | ||
| 101 | {}, {}, {}, info); | ||
| 102 | case 5: // 2D.LL.DC | ||
| 103 | CheckAlignment(reg_a, 2); | ||
| 104 | CheckAlignment(reg_b, 2); | ||
| 105 | info.type.Assign(TextureType::Color2D); | ||
| 106 | info.is_depth.Assign(1); | ||
| 107 | return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), | ||
| 108 | v.F(reg_b + 1), v.F(reg_b), {}, info); | ||
| 109 | case 6: // 2D.LZ.DC | ||
| 110 | CheckAlignment(reg_a, 2); | ||
| 111 | info.type.Assign(TextureType::Color2D); | ||
| 112 | info.is_depth.Assign(1); | ||
| 113 | return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), | ||
| 114 | zero, {}, info); | ||
| 115 | case 7: // ARRAY_2D | ||
| 116 | CheckAlignment(reg_a, 2); | ||
| 117 | info.type.Assign(TextureType::ColorArray2D); | ||
| 118 | return v.ir.ImageSampleImplicitLod( | ||
| 119 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 120 | {}, {}, {}, info); | ||
| 121 | case 8: // ARRAY_2D.LZ | ||
| 122 | CheckAlignment(reg_a, 2); | ||
| 123 | info.type.Assign(TextureType::ColorArray2D); | ||
| 124 | return v.ir.ImageSampleExplicitLod( | ||
| 125 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 126 | zero, {}, info); | ||
| 127 | case 9: // ARRAY_2D.LZ.DC | ||
| 128 | CheckAlignment(reg_a, 2); | ||
| 129 | CheckAlignment(reg_b, 2); | ||
| 130 | info.type.Assign(TextureType::ColorArray2D); | ||
| 131 | info.is_depth.Assign(1); | ||
| 132 | return v.ir.ImageSampleDrefExplicitLod( | ||
| 133 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 134 | v.F(reg_b + 1), zero, {}, info); | ||
| 135 | case 10: // 3D | ||
| 136 | CheckAlignment(reg_a, 2); | ||
| 137 | info.type.Assign(TextureType::Color3D); | ||
| 138 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, | ||
| 139 | {}, info); | ||
| 140 | case 11: // 3D.LZ | ||
| 141 | CheckAlignment(reg_a, 2); | ||
| 142 | info.type.Assign(TextureType::Color3D); | ||
| 143 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, | ||
| 144 | info); | ||
| 145 | case 12: // CUBE | ||
| 146 | CheckAlignment(reg_a, 2); | ||
| 147 | info.type.Assign(TextureType::ColorCube); | ||
| 148 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, | ||
| 149 | {}, info); | ||
| 150 | case 13: // CUBE.LL | ||
| 151 | CheckAlignment(reg_a, 2); | ||
| 152 | CheckAlignment(reg_b, 2); | ||
| 153 | info.type.Assign(TextureType::ColorCube); | ||
| 154 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), | ||
| 155 | v.F(reg_b + 1), {}, info); | ||
| 156 | default: | ||
| 157 | throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | unsigned Swizzle(u64 insn) { | ||
| 162 | const Encoding texs{insn}; | ||
| 163 | const size_t encoding{texs.swizzle}; | ||
| 164 | if (texs.dest_reg_b == IR::Reg::RZ) { | ||
| 165 | if (encoding >= RG_LUT.size()) { | ||
| 166 | throw NotImplementedException("Illegal RG encoding {}", encoding); | ||
| 167 | } | ||
| 168 | return RG_LUT[encoding]; | ||
| 169 | } else { | ||
| 170 | if (encoding >= RGBA_LUT.size()) { | ||
| 171 | throw NotImplementedException("Illegal RGBA encoding {}", encoding); | ||
| 172 | } | ||
| 173 | return RGBA_LUT[encoding]; | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { | ||
| 178 | const bool is_shadow{sample.Type() == IR::Type::F32}; | ||
| 179 | if (is_shadow) { | ||
| 180 | const bool is_alpha{component == 3}; | ||
| 181 | return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample}; | ||
| 182 | } else { | ||
| 183 | return IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | IR::Reg RegStoreComponent32(u64 insn, unsigned index) { | ||
| 188 | const Encoding texs{insn}; | ||
| 189 | switch (index) { | ||
| 190 | case 0: | ||
| 191 | return texs.dest_reg_a; | ||
| 192 | case 1: | ||
| 193 | CheckAlignment(texs.dest_reg_a, 2); | ||
| 194 | return texs.dest_reg_a + 1; | ||
| 195 | case 2: | ||
| 196 | return texs.dest_reg_b; | ||
| 197 | case 3: | ||
| 198 | CheckAlignment(texs.dest_reg_b, 2); | ||
| 199 | return texs.dest_reg_b + 1; | ||
| 200 | } | ||
| 201 | throw LogicError("Invalid store index {}", index); | ||
| 202 | } | ||
| 203 | |||
| 204 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 205 | const unsigned swizzle{Swizzle(insn)}; | ||
| 206 | unsigned store_index{0}; | ||
| 207 | for (unsigned component = 0; component < 4; ++component) { | ||
| 208 | if (((swizzle >> component) & 1) == 0) { | ||
| 209 | continue; | ||
| 210 | } | ||
| 211 | const IR::Reg dest{RegStoreComponent32(insn, store_index)}; | ||
| 212 | v.F(dest, Extract(v, sample, component)); | ||
| 213 | ++store_index; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 218 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 219 | } | ||
| 220 | |||
| 221 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 222 | const unsigned swizzle{Swizzle(insn)}; | ||
| 223 | unsigned store_index{0}; | ||
| 224 | std::array<IR::F32, 4> swizzled; | ||
| 225 | for (unsigned component = 0; component < 4; ++component) { | ||
| 226 | if (((swizzle >> component) & 1) == 0) { | ||
| 227 | continue; | ||
| 228 | } | ||
| 229 | swizzled[store_index] = Extract(v, sample, component); | ||
| 230 | ++store_index; | ||
| 231 | } | ||
| 232 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 233 | const Encoding texs{insn}; | ||
| 234 | switch (store_index) { | ||
| 235 | case 1: | ||
| 236 | v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero)); | ||
| 237 | break; | ||
| 238 | case 2: | ||
| 239 | case 3: | ||
| 240 | case 4: | ||
| 241 | v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 242 | switch (store_index) { | ||
| 243 | case 2: | ||
| 244 | break; | ||
| 245 | case 3: | ||
| 246 | v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero)); | ||
| 247 | break; | ||
| 248 | case 4: | ||
| 249 | v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 250 | break; | ||
| 251 | } | ||
| 252 | break; | ||
| 253 | } | ||
| 254 | } | ||
| 255 | } // Anonymous namespace | ||
| 256 | |||
| 257 | void TranslatorVisitor::TEXS(u64 insn) { | ||
| 258 | const IR::Value sample{Sample(*this, insn)}; | ||
| 259 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 260 | Store32(*this, insn, sample); | ||
| 261 | } else { | ||
| 262 | Store16(*this, insn, sample); | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp new file mode 100644 index 000000000..218cbc1a8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp | |||
| @@ -0,0 +1,208 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | enum class OffsetType : u64 { | ||
| 27 | None = 0, | ||
| 28 | AOFFI, | ||
| 29 | PTP, | ||
| 30 | Invalid, | ||
| 31 | }; | ||
| 32 | |||
| 33 | enum class ComponentType : u64 { | ||
| 34 | R = 0, | ||
| 35 | G = 1, | ||
| 36 | B = 2, | ||
| 37 | A = 3, | ||
| 38 | }; | ||
| 39 | |||
| 40 | Shader::TextureType GetType(TextureType type) { | ||
| 41 | switch (type) { | ||
| 42 | case TextureType::_1D: | ||
| 43 | return Shader::TextureType::Color1D; | ||
| 44 | case TextureType::ARRAY_1D: | ||
| 45 | return Shader::TextureType::ColorArray1D; | ||
| 46 | case TextureType::_2D: | ||
| 47 | return Shader::TextureType::Color2D; | ||
| 48 | case TextureType::ARRAY_2D: | ||
| 49 | return Shader::TextureType::ColorArray2D; | ||
| 50 | case TextureType::_3D: | ||
| 51 | return Shader::TextureType::Color3D; | ||
| 52 | case TextureType::ARRAY_3D: | ||
| 53 | throw NotImplementedException("3D array texture type"); | ||
| 54 | case TextureType::CUBE: | ||
| 55 | return Shader::TextureType::ColorCube; | ||
| 56 | case TextureType::ARRAY_CUBE: | ||
| 57 | return Shader::TextureType::ColorArrayCube; | ||
| 58 | } | ||
| 59 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 60 | } | ||
| 61 | |||
| 62 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 63 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; | ||
| 64 | switch (type) { | ||
| 65 | case TextureType::_1D: | ||
| 66 | return v.F(reg); | ||
| 67 | case TextureType::ARRAY_1D: | ||
| 68 | return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); | ||
| 69 | case TextureType::_2D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 71 | case TextureType::ARRAY_2D: | ||
| 72 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); | ||
| 73 | case TextureType::_3D: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_3D: | ||
| 76 | throw NotImplementedException("3D array texture type"); | ||
| 77 | case TextureType::CUBE: | ||
| 78 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 79 | case TextureType::ARRAY_CUBE: | ||
| 80 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); | ||
| 81 | } | ||
| 82 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 83 | } | ||
| 84 | |||
| 85 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 86 | const IR::U32 value{v.X(reg++)}; | ||
| 87 | switch (type) { | ||
| 88 | case TextureType::_1D: | ||
| 89 | case TextureType::ARRAY_1D: | ||
| 90 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true); | ||
| 91 | case TextureType::_2D: | ||
| 92 | case TextureType::ARRAY_2D: | ||
| 93 | return v.ir.CompositeConstruct( | ||
| 94 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 95 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); | ||
| 96 | case TextureType::_3D: | ||
| 97 | case TextureType::ARRAY_3D: | ||
| 98 | return v.ir.CompositeConstruct( | ||
| 99 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 100 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), | ||
| 101 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true)); | ||
| 102 | case TextureType::CUBE: | ||
| 103 | case TextureType::ARRAY_CUBE: | ||
| 104 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 105 | } | ||
| 106 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 107 | } | ||
| 108 | |||
| 109 | std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { | ||
| 110 | const IR::U32 value1{v.X(reg++)}; | ||
| 111 | const IR::U32 value2{v.X(reg++)}; | ||
| 112 | const IR::U32 bitsize{v.ir.Imm32(6)}; | ||
| 113 | const auto make_vector{[&v, &bitsize](const IR::U32& value) { | ||
| 114 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true), | ||
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true), | ||
| 116 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true), | ||
| 117 | v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true)); | ||
| 118 | }}; | ||
| 119 | return {make_vector(value1), make_vector(value2)}; | ||
| 120 | } | ||
| 121 | |||
| 122 | void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, | ||
| 123 | bool is_bindless) { | ||
| 124 | union { | ||
| 125 | u64 raw; | ||
| 126 | BitField<35, 1, u64> ndv; | ||
| 127 | BitField<49, 1, u64> nodep; | ||
| 128 | BitField<50, 1, u64> dc; | ||
| 129 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 130 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 131 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 132 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 133 | BitField<28, 3, TextureType> type; | ||
| 134 | BitField<31, 4, u64> mask; | ||
| 135 | BitField<36, 13, u64> cbuf_offset; | ||
| 136 | } const tld4{insn}; | ||
| 137 | |||
| 138 | const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)}; | ||
| 139 | |||
| 140 | IR::Reg meta_reg{tld4.meta_reg}; | ||
| 141 | IR::Value handle; | ||
| 142 | IR::Value offset; | ||
| 143 | IR::Value offset2; | ||
| 144 | IR::F32 dref; | ||
| 145 | if (!is_bindless) { | ||
| 146 | handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4)); | ||
| 147 | } else { | ||
| 148 | handle = v.X(meta_reg++); | ||
| 149 | } | ||
| 150 | switch (offset_type) { | ||
| 151 | case OffsetType::None: | ||
| 152 | break; | ||
| 153 | case OffsetType::AOFFI: | ||
| 154 | offset = MakeOffset(v, meta_reg, tld4.type); | ||
| 155 | break; | ||
| 156 | case OffsetType::PTP: | ||
| 157 | std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); | ||
| 158 | break; | ||
| 159 | default: | ||
| 160 | throw NotImplementedException("Invalid offset type {}", offset_type); | ||
| 161 | } | ||
| 162 | if (tld4.dc != 0) { | ||
| 163 | dref = v.F(meta_reg++); | ||
| 164 | } | ||
| 165 | IR::TextureInstInfo info{}; | ||
| 166 | info.type.Assign(GetType(tld4.type)); | ||
| 167 | info.is_depth.Assign(tld4.dc != 0 ? 1 : 0); | ||
| 168 | info.gather_component.Assign(static_cast<u32>(component_type)); | ||
| 169 | const IR::Value sample{[&] { | ||
| 170 | if (tld4.dc == 0) { | ||
| 171 | return v.ir.ImageGather(handle, coords, offset, offset2, info); | ||
| 172 | } | ||
| 173 | return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info); | ||
| 174 | }()}; | ||
| 175 | |||
| 176 | IR::Reg dest_reg{tld4.dest_reg}; | ||
| 177 | for (size_t element = 0; element < 4; ++element) { | ||
| 178 | if (((tld4.mask >> element) & 1) == 0) { | ||
| 179 | continue; | ||
| 180 | } | ||
| 181 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 182 | ++dest_reg; | ||
| 183 | } | ||
| 184 | if (tld4.sparse_pred != IR::Pred::PT) { | ||
| 185 | v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | } // Anonymous namespace | ||
| 189 | |||
| 190 | void TranslatorVisitor::TLD4(u64 insn) { | ||
| 191 | union { | ||
| 192 | u64 raw; | ||
| 193 | BitField<56, 2, ComponentType> component; | ||
| 194 | BitField<54, 2, OffsetType> offset; | ||
| 195 | } const tld4{insn}; | ||
| 196 | Impl(*this, insn, tld4.component, tld4.offset, false); | ||
| 197 | } | ||
| 198 | |||
| 199 | void TranslatorVisitor::TLD4_b(u64 insn) { | ||
| 200 | union { | ||
| 201 | u64 raw; | ||
| 202 | BitField<38, 2, ComponentType> component; | ||
| 203 | BitField<36, 2, OffsetType> offset; | ||
| 204 | } const tld4{insn}; | ||
| 205 | Impl(*this, insn, tld4.component, tld4.offset, true); | ||
| 206 | } | ||
| 207 | |||
| 208 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp new file mode 100644 index 000000000..34efa2d50 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp | |||
| @@ -0,0 +1,134 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F32, | ||
| 16 | F16, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class ComponentType : u64 { | ||
| 20 | R = 0, | ||
| 21 | G = 1, | ||
| 22 | B = 2, | ||
| 23 | A = 3, | ||
| 24 | }; | ||
| 25 | |||
| 26 | union Encoding { | ||
| 27 | u64 raw; | ||
| 28 | BitField<55, 1, Precision> precision; | ||
| 29 | BitField<52, 2, ComponentType> component_type; | ||
| 30 | BitField<51, 1, u64> aoffi; | ||
| 31 | BitField<50, 1, u64> dc; | ||
| 32 | BitField<49, 1, u64> nodep; | ||
| 33 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 34 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 35 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 36 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 37 | BitField<36, 13, u64> cbuf_offset; | ||
| 38 | }; | ||
| 39 | |||
| 40 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 41 | if (!IR::IsAligned(reg, alignment)) { | ||
| 42 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { | ||
| 47 | const IR::U32 value{v.X(reg)}; | ||
| 48 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 49 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); | ||
| 50 | } | ||
| 51 | |||
| 52 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 53 | const Encoding tld4s{insn}; | ||
| 54 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))}; | ||
| 55 | const IR::Reg reg_a{tld4s.src_reg_a}; | ||
| 56 | const IR::Reg reg_b{tld4s.src_reg_b}; | ||
| 57 | IR::TextureInstInfo info{}; | ||
| 58 | if (tld4s.precision == Precision::F16) { | ||
| 59 | info.relaxed_precision.Assign(1); | ||
| 60 | } | ||
| 61 | info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value())); | ||
| 62 | info.type.Assign(Shader::TextureType::Color2D); | ||
| 63 | info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0); | ||
| 64 | IR::Value coords; | ||
| 65 | if (tld4s.aoffi != 0) { | ||
| 66 | CheckAlignment(reg_a, 2); | ||
| 67 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); | ||
| 68 | IR::Value offset = MakeOffset(v, reg_b); | ||
| 69 | if (tld4s.dc != 0) { | ||
| 70 | CheckAlignment(reg_b, 2); | ||
| 71 | IR::F32 dref = v.F(reg_b + 1); | ||
| 72 | return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info); | ||
| 73 | } | ||
| 74 | return v.ir.ImageGather(handle, coords, offset, {}, info); | ||
| 75 | } | ||
| 76 | if (tld4s.dc != 0) { | ||
| 77 | CheckAlignment(reg_a, 2); | ||
| 78 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); | ||
| 79 | IR::F32 dref = v.F(reg_b); | ||
| 80 | return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info); | ||
| 81 | } | ||
| 82 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b)); | ||
| 83 | return v.ir.ImageGather(handle, coords, {}, {}, info); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::Reg RegStoreComponent32(u64 insn, size_t index) { | ||
| 87 | const Encoding tlds4{insn}; | ||
| 88 | switch (index) { | ||
| 89 | case 0: | ||
| 90 | return tlds4.dest_reg_a; | ||
| 91 | case 1: | ||
| 92 | CheckAlignment(tlds4.dest_reg_a, 2); | ||
| 93 | return tlds4.dest_reg_a + 1; | ||
| 94 | case 2: | ||
| 95 | return tlds4.dest_reg_b; | ||
| 96 | case 3: | ||
| 97 | CheckAlignment(tlds4.dest_reg_b, 2); | ||
| 98 | return tlds4.dest_reg_b + 1; | ||
| 99 | } | ||
| 100 | throw LogicError("Invalid store index {}", index); | ||
| 101 | } | ||
| 102 | |||
| 103 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 104 | for (size_t component = 0; component < 4; ++component) { | ||
| 105 | const IR::Reg dest{RegStoreComponent32(insn, component)}; | ||
| 106 | v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)}); | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 111 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 112 | } | ||
| 113 | |||
| 114 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 115 | std::array<IR::F32, 4> swizzled; | ||
| 116 | for (size_t component = 0; component < 4; ++component) { | ||
| 117 | swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 118 | } | ||
| 119 | const Encoding tld4s{insn}; | ||
| 120 | v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 121 | v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 122 | } | ||
| 123 | } // Anonymous namespace | ||
| 124 | |||
| 125 | void TranslatorVisitor::TLD4S(u64 insn) { | ||
| 126 | const IR::Value sample{Sample(*this, insn)}; | ||
| 127 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 128 | Store32(*this, insn, sample); | ||
| 129 | } else { | ||
| 130 | Store16(*this, insn, sample); | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp new file mode 100644 index 000000000..c3fe3ffda --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { | ||
| 49 | const IR::U32 value{v.X(reg)}; | ||
| 50 | const u32 base{has_lod_clamp ? 12U : 16U}; | ||
| 51 | return v.ir.CompositeConstruct( | ||
| 52 | v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), | ||
| 53 | v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 57 | union { | ||
| 58 | u64 raw; | ||
| 59 | BitField<49, 1, u64> nodep; | ||
| 60 | BitField<35, 1, u64> aoffi; | ||
| 61 | BitField<50, 1, u64> lc; | ||
| 62 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 63 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 64 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 65 | BitField<20, 8, IR::Reg> derivate_reg; | ||
| 66 | BitField<28, 3, TextureType> type; | ||
| 67 | BitField<31, 4, u64> mask; | ||
| 68 | BitField<36, 13, u64> cbuf_offset; | ||
| 69 | } const txd{insn}; | ||
| 70 | |||
| 71 | const bool has_lod_clamp = txd.lc != 0; | ||
| 72 | if (has_lod_clamp) { | ||
| 73 | throw NotImplementedException("TXD.LC - CLAMP is not implemented"); | ||
| 74 | } | ||
| 75 | |||
| 76 | IR::Value coords; | ||
| 77 | u32 num_derivates{}; | ||
| 78 | IR::Reg base_reg{txd.coord_reg}; | ||
| 79 | IR::Reg last_reg; | ||
| 80 | IR::Value handle; | ||
| 81 | if (is_bindless) { | ||
| 82 | handle = v.X(base_reg++); | ||
| 83 | } else { | ||
| 84 | handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4)); | ||
| 85 | } | ||
| 86 | |||
| 87 | const auto read_array{[&]() -> IR::F32 { | ||
| 88 | const IR::U32 base{v.ir.Imm32(0)}; | ||
| 89 | const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; | ||
| 90 | const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; | ||
| 91 | return v.ir.ConvertUToF(32, 16, array_index); | ||
| 92 | }}; | ||
| 93 | switch (txd.type) { | ||
| 94 | case TextureType::_1D: { | ||
| 95 | coords = v.F(base_reg); | ||
| 96 | num_derivates = 1; | ||
| 97 | last_reg = base_reg + 1; | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | case TextureType::ARRAY_1D: { | ||
| 101 | last_reg = base_reg + 1; | ||
| 102 | coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); | ||
| 103 | num_derivates = 1; | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | case TextureType::_2D: { | ||
| 107 | last_reg = base_reg + 2; | ||
| 108 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); | ||
| 109 | num_derivates = 2; | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | case TextureType::ARRAY_2D: { | ||
| 113 | last_reg = base_reg + 2; | ||
| 114 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); | ||
| 115 | num_derivates = 2; | ||
| 116 | break; | ||
| 117 | } | ||
| 118 | default: | ||
| 119 | throw NotImplementedException("Invalid texture type"); | ||
| 120 | } | ||
| 121 | |||
| 122 | const IR::Reg derivate_reg{txd.derivate_reg}; | ||
| 123 | IR::Value derivates; | ||
| 124 | switch (num_derivates) { | ||
| 125 | case 1: { | ||
| 126 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | case 2: { | ||
| 130 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), | ||
| 131 | v.F(derivate_reg + 2), v.F(derivate_reg + 3)); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | default: | ||
| 135 | throw NotImplementedException("Invalid texture type"); | ||
| 136 | } | ||
| 137 | |||
| 138 | IR::Value offset; | ||
| 139 | if (txd.aoffi != 0) { | ||
| 140 | offset = MakeOffset(v, last_reg, has_lod_clamp); | ||
| 141 | } | ||
| 142 | |||
| 143 | IR::F32 lod_clamp; | ||
| 144 | if (has_lod_clamp) { | ||
| 145 | // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. | ||
| 146 | // to convert a fixed point, float(value) / float(1 << fixed_point) | ||
| 147 | // in this case the fixed_point is 8. | ||
| 148 | const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))}; | ||
| 149 | const IR::F32 fixp_lc{v.ir.ConvertUToF( | ||
| 150 | 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; | ||
| 151 | lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); | ||
| 152 | } | ||
| 153 | |||
| 154 | IR::TextureInstInfo info{}; | ||
| 155 | info.type.Assign(GetType(txd.type)); | ||
| 156 | info.num_derivates.Assign(num_derivates); | ||
| 157 | info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); | ||
| 158 | const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; | ||
| 159 | |||
| 160 | IR::Reg dest_reg{txd.dest_reg}; | ||
| 161 | for (size_t element = 0; element < 4; ++element) { | ||
| 162 | if (((txd.mask >> element) & 1) == 0) { | ||
| 163 | continue; | ||
| 164 | } | ||
| 165 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 166 | ++dest_reg; | ||
| 167 | } | ||
| 168 | if (txd.sparse_pred != IR::Pred::PT) { | ||
| 169 | v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 170 | } | ||
| 171 | } | ||
| 172 | } // Anonymous namespace | ||
| 173 | |||
| 174 | void TranslatorVisitor::TXD(u64 insn) { | ||
| 175 | Impl(*this, insn, false); | ||
| 176 | } | ||
| 177 | |||
| 178 | void TranslatorVisitor::TXD_b(u64 insn) { | ||
| 179 | Impl(*this, insn, true); | ||
| 180 | } | ||
| 181 | |||
| 182 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp new file mode 100644 index 000000000..983058303 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp | |||
| @@ -0,0 +1,165 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 49 | const auto read_array{ | ||
| 50 | [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; | ||
| 51 | switch (type) { | ||
| 52 | case TextureType::_1D: | ||
| 53 | return v.X(reg); | ||
| 54 | case TextureType::ARRAY_1D: | ||
| 55 | return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); | ||
| 56 | case TextureType::_2D: | ||
| 57 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 58 | case TextureType::ARRAY_2D: | ||
| 59 | return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); | ||
| 60 | case TextureType::_3D: | ||
| 61 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 62 | case TextureType::ARRAY_3D: | ||
| 63 | throw NotImplementedException("3D array texture type"); | ||
| 64 | case TextureType::CUBE: | ||
| 65 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 66 | case TextureType::ARRAY_CUBE: | ||
| 67 | return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); | ||
| 68 | } | ||
| 69 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 70 | } | ||
| 71 | |||
| 72 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 73 | const IR::U32 value{v.X(reg++)}; | ||
| 74 | switch (type) { | ||
| 75 | case TextureType::_1D: | ||
| 76 | case TextureType::ARRAY_1D: | ||
| 77 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); | ||
| 78 | case TextureType::_2D: | ||
| 79 | case TextureType::ARRAY_2D: | ||
| 80 | return v.ir.CompositeConstruct( | ||
| 81 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 82 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 83 | case TextureType::_3D: | ||
| 84 | case TextureType::ARRAY_3D: | ||
| 85 | return v.ir.CompositeConstruct( | ||
| 86 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 87 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), | ||
| 88 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||
| 89 | case TextureType::CUBE: | ||
| 90 | case TextureType::ARRAY_CUBE: | ||
| 91 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 92 | } | ||
| 93 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 94 | } | ||
| 95 | |||
| 96 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 97 | union { | ||
| 98 | u64 raw; | ||
| 99 | BitField<49, 1, u64> nodep; | ||
| 100 | BitField<55, 1, u64> lod; | ||
| 101 | BitField<50, 1, u64> multisample; | ||
| 102 | BitField<35, 1, u64> aoffi; | ||
| 103 | BitField<54, 1, u64> clamp; | ||
| 104 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 105 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 106 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 107 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 108 | BitField<28, 3, TextureType> type; | ||
| 109 | BitField<31, 4, u64> mask; | ||
| 110 | BitField<36, 13, u64> cbuf_offset; | ||
| 111 | } const tld{insn}; | ||
| 112 | |||
| 113 | const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; | ||
| 114 | |||
| 115 | IR::Reg meta_reg{tld.meta_reg}; | ||
| 116 | IR::Value handle; | ||
| 117 | IR::Value offset; | ||
| 118 | IR::U32 lod; | ||
| 119 | IR::U32 multisample; | ||
| 120 | if (is_bindless) { | ||
| 121 | handle = v.X(meta_reg++); | ||
| 122 | } else { | ||
| 123 | handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4)); | ||
| 124 | } | ||
| 125 | if (tld.lod != 0) { | ||
| 126 | lod = v.X(meta_reg++); | ||
| 127 | } else { | ||
| 128 | lod = v.ir.Imm32(0U); | ||
| 129 | } | ||
| 130 | if (tld.aoffi != 0) { | ||
| 131 | offset = MakeOffset(v, meta_reg, tld.type); | ||
| 132 | } | ||
| 133 | if (tld.multisample != 0) { | ||
| 134 | multisample = v.X(meta_reg++); | ||
| 135 | } | ||
| 136 | if (tld.clamp != 0) { | ||
| 137 | throw NotImplementedException("TLD.CL - CLAMP is not implmented"); | ||
| 138 | } | ||
| 139 | IR::TextureInstInfo info{}; | ||
| 140 | info.type.Assign(GetType(tld.type)); | ||
| 141 | const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; | ||
| 142 | |||
| 143 | IR::Reg dest_reg{tld.dest_reg}; | ||
| 144 | for (size_t element = 0; element < 4; ++element) { | ||
| 145 | if (((tld.mask >> element) & 1) == 0) { | ||
| 146 | continue; | ||
| 147 | } | ||
| 148 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 149 | ++dest_reg; | ||
| 150 | } | ||
| 151 | if (tld.sparse_pred != IR::Pred::PT) { | ||
| 152 | v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 153 | } | ||
| 154 | } | ||
| 155 | } // Anonymous namespace | ||
| 156 | |||
| 157 | void TranslatorVisitor::TLD(u64 insn) { | ||
| 158 | Impl(*this, insn, false); | ||
| 159 | } | ||
| 160 | |||
| 161 | void TranslatorVisitor::TLD_b(u64 insn) { | ||
| 162 | Impl(*this, insn, true); | ||
| 163 | } | ||
| 164 | |||
| 165 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp new file mode 100644 index 000000000..5dd7e31b2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp | |||
| @@ -0,0 +1,242 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F16, | ||
| 16 | F32, | ||
| 17 | }; | ||
| 18 | |||
| 19 | constexpr unsigned R = 1; | ||
| 20 | constexpr unsigned G = 2; | ||
| 21 | constexpr unsigned B = 4; | ||
| 22 | constexpr unsigned A = 8; | ||
| 23 | |||
| 24 | constexpr std::array RG_LUT{ | ||
| 25 | R, // | ||
| 26 | G, // | ||
| 27 | B, // | ||
| 28 | A, // | ||
| 29 | R | G, // | ||
| 30 | R | A, // | ||
| 31 | G | A, // | ||
| 32 | B | A, // | ||
| 33 | }; | ||
| 34 | |||
| 35 | constexpr std::array RGBA_LUT{ | ||
| 36 | R | G | B, // | ||
| 37 | R | G | A, // | ||
| 38 | R | B | A, // | ||
| 39 | G | B | A, // | ||
| 40 | R | G | B | A, // | ||
| 41 | }; | ||
| 42 | |||
| 43 | union Encoding { | ||
| 44 | u64 raw; | ||
| 45 | BitField<59, 1, Precision> precision; | ||
| 46 | BitField<54, 1, u64> aoffi; | ||
| 47 | BitField<53, 1, u64> lod; | ||
| 48 | BitField<55, 1, u64> ms; | ||
| 49 | BitField<49, 1, u64> nodep; | ||
| 50 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 51 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 52 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 53 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 54 | BitField<36, 13, u64> cbuf_offset; | ||
| 55 | BitField<50, 3, u64> swizzle; | ||
| 56 | BitField<53, 4, u64> encoding; | ||
| 57 | }; | ||
| 58 | |||
| 59 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 60 | if (!IR::IsAligned(reg, alignment)) { | ||
| 61 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { | ||
| 66 | const IR::U32 value{v.X(reg)}; | ||
| 67 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 68 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 72 | const Encoding tlds{insn}; | ||
| 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))}; | ||
| 74 | const IR::Reg reg_a{tlds.src_reg_a}; | ||
| 75 | const IR::Reg reg_b{tlds.src_reg_b}; | ||
| 76 | IR::Value coords; | ||
| 77 | IR::U32 lod{v.ir.Imm32(0U)}; | ||
| 78 | IR::Value offsets; | ||
| 79 | IR::U32 multisample; | ||
| 80 | Shader::TextureType texture_type{}; | ||
| 81 | switch (tlds.encoding) { | ||
| 82 | case 0: | ||
| 83 | texture_type = Shader::TextureType::Color1D; | ||
| 84 | coords = v.X(reg_a); | ||
| 85 | break; | ||
| 86 | case 1: | ||
| 87 | texture_type = Shader::TextureType::Color1D; | ||
| 88 | coords = v.X(reg_a); | ||
| 89 | lod = v.X(reg_b); | ||
| 90 | break; | ||
| 91 | case 2: | ||
| 92 | texture_type = Shader::TextureType::Color2D; | ||
| 93 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); | ||
| 94 | break; | ||
| 95 | case 4: | ||
| 96 | CheckAlignment(reg_a, 2); | ||
| 97 | texture_type = Shader::TextureType::Color2D; | ||
| 98 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 99 | offsets = MakeOffset(v, reg_b); | ||
| 100 | break; | ||
| 101 | case 5: | ||
| 102 | CheckAlignment(reg_a, 2); | ||
| 103 | texture_type = Shader::TextureType::Color2D; | ||
| 104 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 105 | lod = v.X(reg_b); | ||
| 106 | break; | ||
| 107 | case 6: | ||
| 108 | CheckAlignment(reg_a, 2); | ||
| 109 | texture_type = Shader::TextureType::Color2D; | ||
| 110 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 111 | multisample = v.X(reg_b); | ||
| 112 | break; | ||
| 113 | case 7: | ||
| 114 | CheckAlignment(reg_a, 2); | ||
| 115 | texture_type = Shader::TextureType::Color3D; | ||
| 116 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); | ||
| 117 | break; | ||
| 118 | case 8: { | ||
| 119 | CheckAlignment(reg_b, 2); | ||
| 120 | const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))}; | ||
| 121 | texture_type = Shader::TextureType::ColorArray2D; | ||
| 122 | coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); | ||
| 123 | break; | ||
| 124 | } | ||
| 125 | case 12: | ||
| 126 | CheckAlignment(reg_a, 2); | ||
| 127 | CheckAlignment(reg_b, 2); | ||
| 128 | texture_type = Shader::TextureType::Color2D; | ||
| 129 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 130 | lod = v.X(reg_b); | ||
| 131 | offsets = MakeOffset(v, reg_b + 1); | ||
| 132 | break; | ||
| 133 | default: | ||
| 134 | throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); | ||
| 135 | } | ||
| 136 | IR::TextureInstInfo info{}; | ||
| 137 | if (tlds.precision == Precision::F16) { | ||
| 138 | info.relaxed_precision.Assign(1); | ||
| 139 | } | ||
| 140 | info.type.Assign(texture_type); | ||
| 141 | return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); | ||
| 142 | } | ||
| 143 | |||
| 144 | unsigned Swizzle(u64 insn) { | ||
| 145 | const Encoding tlds{insn}; | ||
| 146 | const size_t encoding{tlds.swizzle}; | ||
| 147 | if (tlds.dest_reg_b == IR::Reg::RZ) { | ||
| 148 | if (encoding >= RG_LUT.size()) { | ||
| 149 | throw NotImplementedException("Illegal RG encoding {}", encoding); | ||
| 150 | } | ||
| 151 | return RG_LUT[encoding]; | ||
| 152 | } else { | ||
| 153 | if (encoding >= RGBA_LUT.size()) { | ||
| 154 | throw NotImplementedException("Illegal RGBA encoding {}", encoding); | ||
| 155 | } | ||
| 156 | return RGBA_LUT[encoding]; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { | ||
| 161 | return IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 162 | } | ||
| 163 | |||
| 164 | IR::Reg RegStoreComponent32(u64 insn, unsigned index) { | ||
| 165 | const Encoding tlds{insn}; | ||
| 166 | switch (index) { | ||
| 167 | case 0: | ||
| 168 | return tlds.dest_reg_a; | ||
| 169 | case 1: | ||
| 170 | CheckAlignment(tlds.dest_reg_a, 2); | ||
| 171 | return tlds.dest_reg_a + 1; | ||
| 172 | case 2: | ||
| 173 | return tlds.dest_reg_b; | ||
| 174 | case 3: | ||
| 175 | CheckAlignment(tlds.dest_reg_b, 2); | ||
| 176 | return tlds.dest_reg_b + 1; | ||
| 177 | } | ||
| 178 | throw LogicError("Invalid store index {}", index); | ||
| 179 | } | ||
| 180 | |||
| 181 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 182 | const unsigned swizzle{Swizzle(insn)}; | ||
| 183 | unsigned store_index{0}; | ||
| 184 | for (unsigned component = 0; component < 4; ++component) { | ||
| 185 | if (((swizzle >> component) & 1) == 0) { | ||
| 186 | continue; | ||
| 187 | } | ||
| 188 | const IR::Reg dest{RegStoreComponent32(insn, store_index)}; | ||
| 189 | v.F(dest, Extract(v, sample, component)); | ||
| 190 | ++store_index; | ||
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 195 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 196 | } | ||
| 197 | |||
| 198 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 199 | const unsigned swizzle{Swizzle(insn)}; | ||
| 200 | unsigned store_index{0}; | ||
| 201 | std::array<IR::F32, 4> swizzled; | ||
| 202 | for (unsigned component = 0; component < 4; ++component) { | ||
| 203 | if (((swizzle >> component) & 1) == 0) { | ||
| 204 | continue; | ||
| 205 | } | ||
| 206 | swizzled[store_index] = Extract(v, sample, component); | ||
| 207 | ++store_index; | ||
| 208 | } | ||
| 209 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 210 | const Encoding tlds{insn}; | ||
| 211 | switch (store_index) { | ||
| 212 | case 1: | ||
| 213 | v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero)); | ||
| 214 | break; | ||
| 215 | case 2: | ||
| 216 | case 3: | ||
| 217 | case 4: | ||
| 218 | v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 219 | switch (store_index) { | ||
| 220 | case 2: | ||
| 221 | break; | ||
| 222 | case 3: | ||
| 223 | v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero)); | ||
| 224 | break; | ||
| 225 | case 4: | ||
| 226 | v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | } | ||
| 232 | } // Anonymous namespace | ||
| 233 | |||
| 234 | void TranslatorVisitor::TLDS(u64 insn) { | ||
| 235 | const IR::Value sample{Sample(*this, insn)}; | ||
| 236 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 237 | Store32(*this, insn, sample); | ||
| 238 | } else { | ||
| 239 | Store16(*this, insn, sample); | ||
| 240 | } | ||
| 241 | } | ||
| 242 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp new file mode 100644 index 000000000..aea3c0e62 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp | |||
| @@ -0,0 +1,131 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 49 | // The ISA reads an array component here, but this is not needed on high level shading languages | ||
| 50 | // We are dropping this information. | ||
| 51 | switch (type) { | ||
| 52 | case TextureType::_1D: | ||
| 53 | return v.F(reg); | ||
| 54 | case TextureType::ARRAY_1D: | ||
| 55 | return v.F(reg + 1); | ||
| 56 | case TextureType::_2D: | ||
| 57 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 58 | case TextureType::ARRAY_2D: | ||
| 59 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2)); | ||
| 60 | case TextureType::_3D: | ||
| 61 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 62 | case TextureType::ARRAY_3D: | ||
| 63 | throw NotImplementedException("3D array texture type"); | ||
| 64 | case TextureType::CUBE: | ||
| 65 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 66 | case TextureType::ARRAY_CUBE: | ||
| 67 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); | ||
| 68 | } | ||
| 69 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 70 | } | ||
| 71 | |||
| 72 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 73 | union { | ||
| 74 | u64 raw; | ||
| 75 | BitField<49, 1, u64> nodep; | ||
| 76 | BitField<35, 1, u64> ndv; | ||
| 77 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 78 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 79 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 80 | BitField<28, 3, TextureType> type; | ||
| 81 | BitField<31, 4, u64> mask; | ||
| 82 | BitField<36, 13, u64> cbuf_offset; | ||
| 83 | } const tmml{insn}; | ||
| 84 | |||
| 85 | if ((tmml.mask & 0b1100) != 0) { | ||
| 86 | throw NotImplementedException("TMML BA results are not implmented"); | ||
| 87 | } | ||
| 88 | const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; | ||
| 89 | |||
| 90 | IR::U32 handle; | ||
| 91 | IR::Reg meta_reg{tmml.meta_reg}; | ||
| 92 | if (is_bindless) { | ||
| 93 | handle = v.X(meta_reg++); | ||
| 94 | } else { | ||
| 95 | handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4)); | ||
| 96 | } | ||
| 97 | IR::TextureInstInfo info{}; | ||
| 98 | info.type.Assign(GetType(tmml.type)); | ||
| 99 | const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; | ||
| 100 | |||
| 101 | IR::Reg dest_reg{tmml.dest_reg}; | ||
| 102 | for (size_t element = 0; element < 4; ++element) { | ||
| 103 | if (((tmml.mask >> element) & 1) == 0) { | ||
| 104 | continue; | ||
| 105 | } | ||
| 106 | IR::F32 value{v.ir.CompositeExtract(sample, element)}; | ||
| 107 | if (element < 2) { | ||
| 108 | IR::U32 casted_value; | ||
| 109 | if (element == 0) { | ||
| 110 | casted_value = v.ir.ConvertFToU(32, value); | ||
| 111 | } else { | ||
| 112 | casted_value = v.ir.ConvertFToS(16, value); | ||
| 113 | } | ||
| 114 | v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8))); | ||
| 115 | } else { | ||
| 116 | v.F(dest_reg, value); | ||
| 117 | } | ||
| 118 | ++dest_reg; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | } // Anonymous namespace | ||
| 122 | |||
| 123 | void TranslatorVisitor::TMML(u64 insn) { | ||
| 124 | Impl(*this, insn, false); | ||
| 125 | } | ||
| 126 | |||
| 127 | void TranslatorVisitor::TMML_b(u64 insn) { | ||
| 128 | Impl(*this, insn, true); | ||
| 129 | } | ||
| 130 | |||
| 131 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp new file mode 100644 index 000000000..0459e5473 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Mode : u64 { | ||
| 15 | Dimension = 1, | ||
| 16 | TextureType = 2, | ||
| 17 | SamplePos = 5, | ||
| 18 | }; | ||
| 19 | |||
| 20 | IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { | ||
| 21 | switch (mode) { | ||
| 22 | case Mode::Dimension: { | ||
| 23 | const IR::U32 lod{v.X(src_reg)}; | ||
| 24 | return v.ir.ImageQueryDimension(handle, lod); | ||
| 25 | } | ||
| 26 | case Mode::TextureType: | ||
| 27 | case Mode::SamplePos: | ||
| 28 | default: | ||
| 29 | throw NotImplementedException("Mode {}", mode); | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { | ||
| 34 | union { | ||
| 35 | u64 raw; | ||
| 36 | BitField<49, 1, u64> nodep; | ||
| 37 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 38 | BitField<8, 8, IR::Reg> src_reg; | ||
| 39 | BitField<22, 3, Mode> mode; | ||
| 40 | BitField<31, 4, u64> mask; | ||
| 41 | } const txq{insn}; | ||
| 42 | |||
| 43 | IR::Reg src_reg{txq.src_reg}; | ||
| 44 | IR::U32 handle; | ||
| 45 | if (cbuf_offset) { | ||
| 46 | handle = v.ir.Imm32(*cbuf_offset); | ||
| 47 | } else { | ||
| 48 | handle = v.X(src_reg); | ||
| 49 | ++src_reg; | ||
| 50 | } | ||
| 51 | const IR::Value query{Query(v, handle, txq.mode, src_reg)}; | ||
| 52 | IR::Reg dest_reg{txq.dest_reg}; | ||
| 53 | for (int element = 0; element < 4; ++element) { | ||
| 54 | if (((txq.mask >> element) & 1) == 0) { | ||
| 55 | continue; | ||
| 56 | } | ||
| 57 | v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))}); | ||
| 58 | ++dest_reg; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | void TranslatorVisitor::TXQ(u64 insn) { | ||
| 64 | union { | ||
| 65 | u64 raw; | ||
| 66 | BitField<36, 13, u64> cbuf_offset; | ||
| 67 | } const txq{insn}; | ||
| 68 | |||
| 69 | Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::TXQ_b(u64 insn) { | ||
| 73 | Impl(*this, insn, std::nullopt); | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp new file mode 100644 index 000000000..e1f4174cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/exception.h" | ||
| 6 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 7 | |||
| 8 | namespace Shader::Maxwell { | ||
| 9 | |||
| 10 | IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width, | ||
| 11 | u32 selector, bool is_signed) { | ||
| 12 | switch (width) { | ||
| 13 | case VideoWidth::Byte: | ||
| 14 | case VideoWidth::Unknown: | ||
| 15 | return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed); | ||
| 16 | case VideoWidth::Short: | ||
| 17 | return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed); | ||
| 18 | case VideoWidth::Word: | ||
| 19 | return value; | ||
| 20 | default: | ||
| 21 | throw NotImplementedException("Unknown VideoWidth {}", width); | ||
| 22 | } | ||
| 23 | } | ||
| 24 | |||
| 25 | VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) { | ||
| 26 | // immediates must be 16-bit format. | ||
| 27 | return is_immediate ? VideoWidth::Short : width; | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h new file mode 100644 index 000000000..40c0b907c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | enum class VideoWidth : u64 { | ||
| 12 | Byte, | ||
| 13 | Unknown, | ||
| 14 | Short, | ||
| 15 | Word, | ||
| 16 | }; | ||
| 17 | |||
| 18 | [[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, | ||
| 19 | VideoWidth width, u32 selector, bool is_signed); | ||
| 20 | |||
| 21 | [[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate); | ||
| 22 | |||
| 23 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp new file mode 100644 index 000000000..78869601f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class VideoMinMaxOps : u64 { | ||
| 13 | MRG_16H, | ||
| 14 | MRG_16L, | ||
| 15 | MRG_8B0, | ||
| 16 | MRG_8B2, | ||
| 17 | ACC, | ||
| 18 | MIN, | ||
| 19 | MAX, | ||
| 20 | }; | ||
| 21 | |||
| 22 | [[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs, | ||
| 23 | VideoMinMaxOps op, bool is_signed) { | ||
| 24 | switch (op) { | ||
| 25 | case VideoMinMaxOps::MIN: | ||
| 26 | return ir.IMin(lhs, rhs, is_signed); | ||
| 27 | case VideoMinMaxOps::MAX: | ||
| 28 | return ir.IMax(lhs, rhs, is_signed); | ||
| 29 | default: | ||
| 30 | throw NotImplementedException("VMNMX op {}", op); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | } // Anonymous namespace | ||
| 34 | |||
| 35 | void TranslatorVisitor::VMNMX(u64 insn) { | ||
| 36 | union { | ||
| 37 | u64 raw; | ||
| 38 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 39 | BitField<20, 16, u64> src_b_imm; | ||
| 40 | BitField<28, 2, u64> src_b_selector; | ||
| 41 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 42 | BitField<36, 2, u64> src_a_selector; | ||
| 43 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<48, 1, u64> src_a_sign; | ||
| 46 | BitField<49, 1, u64> src_b_sign; | ||
| 47 | BitField<50, 1, u64> is_src_b_reg; | ||
| 48 | BitField<51, 3, VideoMinMaxOps> op; | ||
| 49 | BitField<54, 1, u64> dest_sign; | ||
| 50 | BitField<55, 1, u64> sat; | ||
| 51 | BitField<56, 1, u64> mx; | ||
| 52 | } const vmnmx{insn}; | ||
| 53 | |||
| 54 | if (vmnmx.cc != 0) { | ||
| 55 | throw NotImplementedException("VMNMX CC"); | ||
| 56 | } | ||
| 57 | if (vmnmx.sat != 0) { | ||
| 58 | throw NotImplementedException("VMNMX SAT"); | ||
| 59 | } | ||
| 60 | // Selectors were shown to default to 2 in unit tests | ||
| 61 | if (vmnmx.src_a_selector != 2) { | ||
| 62 | throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); | ||
| 63 | } | ||
| 64 | if (vmnmx.src_b_selector != 2) { | ||
| 65 | throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); | ||
| 66 | } | ||
| 67 | if (vmnmx.src_a_width != VideoWidth::Word) { | ||
| 68 | throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); | ||
| 69 | } | ||
| 70 | |||
| 71 | const bool is_b_imm{vmnmx.is_src_b_reg == 0}; | ||
| 72 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 73 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)}; | ||
| 74 | const IR::U32 src_c{GetReg39(insn)}; | ||
| 75 | |||
| 76 | const VideoWidth a_width{vmnmx.src_a_width}; | ||
| 77 | const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; | ||
| 78 | |||
| 79 | const bool src_a_signed{vmnmx.src_a_sign != 0}; | ||
| 80 | const bool src_b_signed{vmnmx.src_b_sign != 0}; | ||
| 81 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; | ||
| 82 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; | ||
| 83 | |||
| 84 | // First operation's sign is only dependent on operand b's sign | ||
| 85 | const bool op_1_signed{src_b_signed}; | ||
| 86 | |||
| 87 | const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed) | ||
| 88 | : ir.IMin(op_a, op_b, op_1_signed)}; | ||
| 89 | X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0)); | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp new file mode 100644 index 000000000..cc2e6d6e6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::VMAD(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<20, 16, u64> src_b_imm; | ||
| 16 | BitField<28, 2, u64> src_b_selector; | ||
| 17 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 18 | BitField<36, 2, u64> src_a_selector; | ||
| 19 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 20 | BitField<47, 1, u64> cc; | ||
| 21 | BitField<48, 1, u64> src_a_sign; | ||
| 22 | BitField<49, 1, u64> src_b_sign; | ||
| 23 | BitField<50, 1, u64> is_src_b_reg; | ||
| 24 | BitField<51, 2, u64> scale; | ||
| 25 | BitField<53, 1, u64> src_c_neg; | ||
| 26 | BitField<54, 1, u64> src_a_neg; | ||
| 27 | BitField<55, 1, u64> sat; | ||
| 28 | } const vmad{insn}; | ||
| 29 | |||
| 30 | if (vmad.cc != 0) { | ||
| 31 | throw NotImplementedException("VMAD CC"); | ||
| 32 | } | ||
| 33 | if (vmad.sat != 0) { | ||
| 34 | throw NotImplementedException("VMAD SAT"); | ||
| 35 | } | ||
| 36 | if (vmad.scale != 0) { | ||
| 37 | throw NotImplementedException("VMAD SCALE"); | ||
| 38 | } | ||
| 39 | if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) { | ||
| 40 | throw NotImplementedException("VMAD PO"); | ||
| 41 | } | ||
| 42 | if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) { | ||
| 43 | throw NotImplementedException("VMAD NEG"); | ||
| 44 | } | ||
| 45 | const bool is_b_imm{vmad.is_src_b_reg == 0}; | ||
| 46 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 47 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)}; | ||
| 48 | const IR::U32 src_c{GetReg39(insn)}; | ||
| 49 | |||
| 50 | const u32 a_selector{static_cast<u32>(vmad.src_a_selector)}; | ||
| 51 | // Immediate values can't have a selector | ||
| 52 | const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)}; | ||
| 53 | const VideoWidth a_width{vmad.src_a_width}; | ||
| 54 | const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)}; | ||
| 55 | |||
| 56 | const bool src_a_signed{vmad.src_a_sign != 0}; | ||
| 57 | const bool src_b_signed{vmad.src_b_sign != 0}; | ||
| 58 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; | ||
| 59 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; | ||
| 60 | |||
| 61 | X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c)); | ||
| 62 | } | ||
| 63 | |||
| 64 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp new file mode 100644 index 000000000..1b66abc33 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class VsetpCompareOp : u64 { | ||
| 14 | False = 0, | ||
| 15 | LessThan, | ||
| 16 | Equal, | ||
| 17 | LessThanEqual, | ||
| 18 | GreaterThan = 16, | ||
| 19 | NotEqual, | ||
| 20 | GreaterThanEqual, | ||
| 21 | True, | ||
| 22 | }; | ||
| 23 | |||
| 24 | CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) { | ||
| 25 | switch (op) { | ||
| 26 | case VsetpCompareOp::False: | ||
| 27 | return CompareOp::False; | ||
| 28 | case VsetpCompareOp::LessThan: | ||
| 29 | return CompareOp::LessThan; | ||
| 30 | case VsetpCompareOp::Equal: | ||
| 31 | return CompareOp::Equal; | ||
| 32 | case VsetpCompareOp::LessThanEqual: | ||
| 33 | return CompareOp::LessThanEqual; | ||
| 34 | case VsetpCompareOp::GreaterThan: | ||
| 35 | return CompareOp::GreaterThan; | ||
| 36 | case VsetpCompareOp::NotEqual: | ||
| 37 | return CompareOp::NotEqual; | ||
| 38 | case VsetpCompareOp::GreaterThanEqual: | ||
| 39 | return CompareOp::GreaterThanEqual; | ||
| 40 | case VsetpCompareOp::True: | ||
| 41 | return CompareOp::True; | ||
| 42 | default: | ||
| 43 | throw NotImplementedException("Invalid compare op {}", op); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void TranslatorVisitor::VSETP(u64 insn) { | ||
| 49 | union { | ||
| 50 | u64 raw; | ||
| 51 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 52 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 53 | BitField<20, 16, u64> src_b_imm; | ||
| 54 | BitField<28, 2, u64> src_b_selector; | ||
| 55 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 56 | BitField<36, 2, u64> src_a_selector; | ||
| 57 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 58 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 59 | BitField<42, 1, u64> neg_bop_pred; | ||
| 60 | BitField<43, 5, VsetpCompareOp> compare_op; | ||
| 61 | BitField<45, 2, BooleanOp> bop; | ||
| 62 | BitField<48, 1, u64> src_a_sign; | ||
| 63 | BitField<49, 1, u64> src_b_sign; | ||
| 64 | BitField<50, 1, u64> is_src_b_reg; | ||
| 65 | } const vsetp{insn}; | ||
| 66 | |||
| 67 | const bool is_b_imm{vsetp.is_src_b_reg == 0}; | ||
| 68 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 69 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)}; | ||
| 70 | |||
| 71 | const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)}; | ||
| 72 | const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)}; | ||
| 73 | const VideoWidth a_width{vsetp.src_a_width}; | ||
| 74 | const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; | ||
| 75 | |||
| 76 | const bool src_a_signed{vsetp.src_a_sign != 0}; | ||
| 77 | const bool src_b_signed{vsetp.src_b_sign != 0}; | ||
| 78 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; | ||
| 79 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; | ||
| 80 | |||
| 81 | // Compare operation's sign is only dependent on operand b's sign | ||
| 82 | const bool compare_signed{src_b_signed}; | ||
| 83 | const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)}; | ||
| 84 | const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)}; | ||
| 85 | const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)}; | ||
| 86 | const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)}; | ||
| 87 | const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)}; | ||
| 88 | ir.SetPred(vsetp.dest_pred_a, result_a); | ||
| 89 | ir.SetPred(vsetp.dest_pred_b, result_b); | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..7ce370f09 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class VoteOp : u64 { | ||
| 12 | ALL, | ||
| 13 | ANY, | ||
| 14 | EQ, | ||
| 15 | }; | ||
| 16 | |||
| 17 | [[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { | ||
| 18 | switch (vote_op) { | ||
| 19 | case VoteOp::ALL: | ||
| 20 | return ir.VoteAll(pred); | ||
| 21 | case VoteOp::ANY: | ||
| 22 | return ir.VoteAny(pred); | ||
| 23 | case VoteOp::EQ: | ||
| 24 | return ir.VoteEqual(pred); | ||
| 25 | default: | ||
| 26 | throw NotImplementedException("Invalid VOTE op {}", vote_op); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | void Vote(TranslatorVisitor& v, u64 insn) { | ||
| 31 | union { | ||
| 32 | u64 insn; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<39, 3, IR::Pred> pred_a; | ||
| 35 | BitField<42, 1, u64> neg_pred_a; | ||
| 36 | BitField<45, 3, IR::Pred> pred_b; | ||
| 37 | BitField<48, 2, VoteOp> vote_op; | ||
| 38 | } const vote{insn}; | ||
| 39 | |||
| 40 | const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; | ||
| 41 | v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); | ||
| 42 | v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::VOTE(u64 insn) { | ||
| 47 | Vote(*this, insn); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::VOTE_vtg(u64) { | ||
| 51 | LOG_WARNING(Shader, "(STUBBED) called"); | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp new file mode 100644 index 000000000..550fed55c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class ShuffleMode : u64 { | ||
| 14 | IDX, | ||
| 15 | UP, | ||
| 16 | DOWN, | ||
| 17 | BFLY, | ||
| 18 | }; | ||
| 19 | |||
| 20 | [[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, | ||
| 21 | const IR::U32& index, const IR::U32& mask, | ||
| 22 | ShuffleMode shfl_op) { | ||
| 23 | const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; | ||
| 24 | const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; | ||
| 25 | switch (shfl_op) { | ||
| 26 | case ShuffleMode::IDX: | ||
| 27 | return ir.ShuffleIndex(value, index, clamp, seg_mask); | ||
| 28 | case ShuffleMode::UP: | ||
| 29 | return ir.ShuffleUp(value, index, clamp, seg_mask); | ||
| 30 | case ShuffleMode::DOWN: | ||
| 31 | return ir.ShuffleDown(value, index, clamp, seg_mask); | ||
| 32 | case ShuffleMode::BFLY: | ||
| 33 | return ir.ShuffleButterfly(value, index, clamp, seg_mask); | ||
| 34 | default: | ||
| 35 | throw NotImplementedException("Invalid SHFL op {}", shfl_op); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { | ||
| 40 | union { | ||
| 41 | u64 insn; | ||
| 42 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 43 | BitField<8, 8, IR::Reg> src_reg; | ||
| 44 | BitField<30, 2, ShuffleMode> mode; | ||
| 45 | BitField<48, 3, IR::Pred> pred; | ||
| 46 | } const shfl{insn}; | ||
| 47 | |||
| 48 | const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; | ||
| 49 | v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); | ||
| 50 | v.X(shfl.dest_reg, result); | ||
| 51 | } | ||
| 52 | } // Anonymous namespace | ||
| 53 | |||
| 54 | void TranslatorVisitor::SHFL(u64 insn) { | ||
| 55 | union { | ||
| 56 | u64 insn; | ||
| 57 | BitField<20, 5, u64> src_a_imm; | ||
| 58 | BitField<28, 1, u64> src_a_flag; | ||
| 59 | BitField<29, 1, u64> src_b_flag; | ||
| 60 | BitField<34, 13, u64> src_b_imm; | ||
| 61 | } const flags{insn}; | ||
| 62 | const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm)) | ||
| 63 | : GetReg20(insn)}; | ||
| 64 | const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm)) | ||
| 65 | : GetReg39(insn)}; | ||
| 66 | Shuffle(*this, insn, src_a, src_b); | ||
| 67 | } | ||
| 68 | |||
| 69 | } // namespace Shader::Maxwell | ||