diff options
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell')
3 files changed, 332 insertions, 12 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..7a32c5eb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp | |||
| @@ -0,0 +1,222 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | SAFEADD, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class AtomSize : u64 { | ||
| 25 | U32, | ||
| 26 | S32, | ||
| 27 | U64, | ||
| 28 | F32, | ||
| 29 | F16x2, | ||
| 30 | S64, | ||
| 31 | }; | ||
| 32 | |||
| 33 | IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, | ||
| 34 | AtomOp op, bool is_signed) { | ||
| 35 | switch (op) { | ||
| 36 | case AtomOp::ADD: | ||
| 37 | return ir.GlobalAtomicIAdd(offset, op_b); | ||
| 38 | case AtomOp::MIN: | ||
| 39 | return ir.GlobalAtomicIMin(offset, op_b, is_signed); | ||
| 40 | case AtomOp::MAX: | ||
| 41 | return ir.GlobalAtomicIMax(offset, op_b, is_signed); | ||
| 42 | case AtomOp::INC: | ||
| 43 | return ir.GlobalAtomicInc(offset, op_b); | ||
| 44 | case AtomOp::DEC: | ||
| 45 | return ir.GlobalAtomicDec(offset, op_b); | ||
| 46 | case AtomOp::AND: | ||
| 47 | return ir.GlobalAtomicAnd(offset, op_b); | ||
| 48 | case AtomOp::OR: | ||
| 49 | return ir.GlobalAtomicOr(offset, op_b); | ||
| 50 | case AtomOp::XOR: | ||
| 51 | return ir.GlobalAtomicXor(offset, op_b); | ||
| 52 | case AtomOp::EXCH: | ||
| 53 | return ir.GlobalAtomicExchange(offset, op_b); | ||
| 54 | default: | ||
| 55 | throw NotImplementedException("Integer Atom Operation {}", op); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, | ||
| 60 | AtomSize size) { | ||
| 61 | static constexpr IR::FpControl f16_control{ | ||
| 62 | .no_contraction{false}, | ||
| 63 | .rounding{IR::FpRounding::RN}, | ||
| 64 | .fmz_mode{IR::FmzMode::DontCare}, | ||
| 65 | }; | ||
| 66 | static constexpr IR::FpControl f32_control{ | ||
| 67 | .no_contraction{false}, | ||
| 68 | .rounding{IR::FpRounding::RN}, | ||
| 69 | .fmz_mode{IR::FmzMode::FTZ}, | ||
| 70 | }; | ||
| 71 | switch (op) { | ||
| 72 | case AtomOp::ADD: | ||
| 73 | return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) | ||
| 74 | : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); | ||
| 75 | case AtomOp::MIN: | ||
| 76 | return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); | ||
| 77 | case AtomOp::MAX: | ||
| 78 | return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); | ||
| 79 | default: | ||
| 80 | throw NotImplementedException("FP Atom Operation {}", op); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 88 | BitField<28, 20, s64> addr_offset; | ||
| 89 | BitField<28, 20, u64> rz_addr_offset; | ||
| 90 | BitField<48, 1, u64> e; | ||
| 91 | } const mem{insn}; | ||
| 92 | |||
| 93 | const IR::U64 address{[&]() -> IR::U64 { | ||
| 94 | if (mem.e == 0) { | ||
| 95 | return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||
| 96 | } | ||
| 97 | return v.L(mem.addr_reg); | ||
| 98 | }()}; | ||
| 99 | const u64 addr_offset{[&]() -> u64 { | ||
| 100 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 101 | // When RZ is used, the address is an absolute address | ||
| 102 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 103 | } else { | ||
| 104 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 105 | } | ||
| 106 | }()}; | ||
| 107 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 108 | } | ||
| 109 | |||
| 110 | bool AtomOpNotApplicable(AtomSize size, AtomOp op) { | ||
| 111 | // TODO: SAFEADD | ||
| 112 | switch (size) { | ||
| 113 | case AtomSize::S32: | ||
| 114 | case AtomSize::U64: | ||
| 115 | return (op == AtomOp::INC || op == AtomOp::DEC); | ||
| 116 | case AtomSize::S64: | ||
| 117 | return !(op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 118 | case AtomSize::F32: | ||
| 119 | return op != AtomOp::ADD; | ||
| 120 | case AtomSize::F16x2: | ||
| 121 | return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 122 | default: | ||
| 123 | return false; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { | ||
| 128 | switch (size) { | ||
| 129 | case AtomSize::U32: | ||
| 130 | case AtomSize::S32: | ||
| 131 | case AtomSize::F32: | ||
| 132 | case AtomSize::F16x2: | ||
| 133 | return ir.LoadGlobal32(offset); | ||
| 134 | case AtomSize::U64: | ||
| 135 | case AtomSize::S64: | ||
| 136 | return ir.PackUint2x32(ir.LoadGlobal64(offset)); | ||
| 137 | default: | ||
| 138 | throw NotImplementedException("Atom Size {}", size); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { | ||
| 143 | switch (size) { | ||
| 144 | case AtomSize::U32: | ||
| 145 | case AtomSize::S32: | ||
| 146 | case AtomSize::F16x2: | ||
| 147 | return v.X(dest_reg, IR::U32{result}); | ||
| 148 | case AtomSize::U64: | ||
| 149 | case AtomSize::S64: | ||
| 150 | return v.L(dest_reg, IR::U64{result}); | ||
| 151 | case AtomSize::F32: | ||
| 152 | return v.F(dest_reg, IR::F32{result}); | ||
| 153 | default: | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | } // Anonymous namespace | ||
| 158 | |||
| 159 | void TranslatorVisitor::ATOM(u64 insn) { | ||
| 160 | union { | ||
| 161 | u64 raw; | ||
| 162 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 163 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 164 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 165 | BitField<49, 3, AtomSize> size; | ||
| 166 | BitField<52, 4, AtomOp> op; | ||
| 167 | } const atom{insn}; | ||
| 168 | |||
| 169 | const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; | ||
| 170 | const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; | ||
| 171 | const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; | ||
| 172 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 173 | IR::Value result; | ||
| 174 | |||
| 175 | if (AtomOpNotApplicable(atom.size, atom.op)) { | ||
| 176 | result = LoadGlobal(ir, offset, atom.size); | ||
| 177 | } else if (!is_integer) { | ||
| 178 | if (atom.size == AtomSize::F32) { | ||
| 179 | result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); | ||
| 180 | } else { | ||
| 181 | const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; | ||
| 182 | result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); | ||
| 183 | } | ||
| 184 | } else if (size_64) { | ||
| 185 | result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); | ||
| 186 | } else { | ||
| 187 | result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); | ||
| 188 | } | ||
| 189 | StoreResult(*this, atom.dest_reg, result, atom.size); | ||
| 190 | } | ||
| 191 | |||
| 192 | void TranslatorVisitor::RED(u64 insn) { | ||
| 193 | union { | ||
| 194 | u64 raw; | ||
| 195 | BitField<0, 8, IR::Reg> src_reg_b; | ||
| 196 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 197 | BitField<20, 3, AtomSize> size; | ||
| 198 | BitField<23, 3, AtomOp> op; | ||
| 199 | } const red{insn}; | ||
| 200 | |||
| 201 | if (AtomOpNotApplicable(red.size, red.op)) { | ||
| 202 | return; | ||
| 203 | } | ||
| 204 | const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; | ||
| 205 | const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; | ||
| 206 | const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; | ||
| 207 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 208 | if (!is_integer) { | ||
| 209 | if (red.size == AtomSize::F32) { | ||
| 210 | ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); | ||
| 211 | } else { | ||
| 212 | const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; | ||
| 213 | ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); | ||
| 214 | } | ||
| 215 | } else if (size_64) { | ||
| 216 | ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); | ||
| 217 | } else { | ||
| 218 | ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); | ||
| 219 | } | ||
| 220 | } | ||
| 221 | |||
| 222 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum class AtomsSize : u64 { | ||
| 24 | U32, | ||
| 25 | S32, | ||
| 26 | U64, | ||
| 27 | }; | ||
| 28 | |||
| 29 | IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, | ||
| 30 | bool is_signed) { | ||
| 31 | switch (op) { | ||
| 32 | case AtomOp::ADD: | ||
| 33 | return ir.SharedAtomicIAdd(offset, op_b); | ||
| 34 | case AtomOp::MIN: | ||
| 35 | return ir.SharedAtomicIMin(offset, op_b, is_signed); | ||
| 36 | case AtomOp::MAX: | ||
| 37 | return ir.SharedAtomicIMax(offset, op_b, is_signed); | ||
| 38 | case AtomOp::INC: | ||
| 39 | return ir.SharedAtomicInc(offset, op_b); | ||
| 40 | case AtomOp::DEC: | ||
| 41 | return ir.SharedAtomicDec(offset, op_b); | ||
| 42 | case AtomOp::AND: | ||
| 43 | return ir.SharedAtomicAnd(offset, op_b); | ||
| 44 | case AtomOp::OR: | ||
| 45 | return ir.SharedAtomicOr(offset, op_b); | ||
| 46 | case AtomOp::XOR: | ||
| 47 | return ir.SharedAtomicXor(offset, op_b); | ||
| 48 | case AtomOp::EXCH: | ||
| 49 | return ir.SharedAtomicExchange(offset, op_b); | ||
| 50 | default: | ||
| 51 | throw NotImplementedException("Integer Atoms Operation {}", op); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { | ||
| 56 | union { | ||
| 57 | u64 raw; | ||
| 58 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 59 | BitField<30, 22, u64> absolute_offset; | ||
| 60 | BitField<30, 22, s64> relative_offset; | ||
| 61 | } const encoding{insn}; | ||
| 62 | |||
| 63 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 64 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2)); | ||
| 65 | } else { | ||
| 66 | const s32 relative{static_cast<s32>(encoding.relative_offset << 2)}; | ||
| 67 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { | ||
| 72 | switch (size) { | ||
| 73 | case AtomsSize::U32: | ||
| 74 | case AtomsSize::S32: | ||
| 75 | return v.X(dest_reg, IR::U32{result}); | ||
| 76 | case AtomsSize::U64: | ||
| 77 | return v.L(dest_reg, IR::U64{result}); | ||
| 78 | default: | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | } // Anonymous namespace | ||
| 83 | |||
| 84 | void TranslatorVisitor::ATOMS(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 89 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 90 | BitField<28, 2, AtomsSize> size; | ||
| 91 | BitField<52, 4, AtomOp> op; | ||
| 92 | } const atoms{insn}; | ||
| 93 | |||
| 94 | const bool size_64{atoms.size == AtomsSize::U64}; | ||
| 95 | if (size_64 && atoms.op != AtomOp::EXCH) { | ||
| 96 | throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); | ||
| 97 | } | ||
| 98 | const bool is_signed{atoms.size == AtomsSize::S32}; | ||
| 99 | const IR::U32 offset{AtomsOffset(*this, insn)}; | ||
| 100 | |||
| 101 | IR::Value result; | ||
| 102 | if (size_64) { | ||
| 103 | result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); | ||
| 104 | } else { | ||
| 105 | result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); | ||
| 106 | } | ||
| 107 | StoreResult(*this, atoms.dest_reg, result, atoms.size); | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 327941223..aebe3072a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) { | |||
| 17 | ThrowNotImplemented(Opcode::ATOM_cas); | 17 | ThrowNotImplemented(Opcode::ATOM_cas); |
| 18 | } | 18 | } |
| 19 | 19 | ||
| 20 | void TranslatorVisitor::ATOM(u64) { | ||
| 21 | ThrowNotImplemented(Opcode::ATOM); | ||
| 22 | } | ||
| 23 | |||
| 24 | void TranslatorVisitor::ATOMS_cas(u64) { | 20 | void TranslatorVisitor::ATOMS_cas(u64) { |
| 25 | ThrowNotImplemented(Opcode::ATOMS_cas); | 21 | ThrowNotImplemented(Opcode::ATOMS_cas); |
| 26 | } | 22 | } |
| 27 | 23 | ||
| 28 | void TranslatorVisitor::ATOMS(u64) { | ||
| 29 | ThrowNotImplemented(Opcode::ATOMS); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::B2R(u64) { | 24 | void TranslatorVisitor::B2R(u64) { |
| 33 | ThrowNotImplemented(Opcode::B2R); | 25 | ThrowNotImplemented(Opcode::B2R); |
| 34 | } | 26 | } |
| @@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) { | |||
| 241 | ThrowNotImplemented(Opcode::RAM); | 233 | ThrowNotImplemented(Opcode::RAM); |
| 242 | } | 234 | } |
| 243 | 235 | ||
| 244 | void TranslatorVisitor::RED(u64) { | ||
| 245 | ThrowNotImplemented(Opcode::RED); | ||
| 246 | } | ||
| 247 | |||
| 248 | void TranslatorVisitor::RET(u64) { | 236 | void TranslatorVisitor::RET(u64) { |
| 249 | ThrowNotImplemented(Opcode::RET); | 237 | ThrowNotImplemented(Opcode::RET); |
| 250 | } | 238 | } |