diff options
Diffstat (limited to 'src/shader_recompiler/frontend')
| -rw-r--r-- | src/shader_recompiler/frontend/ir/ir_emitter.cpp | 133 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/ir_emitter.h | 21 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/microinstruction.cpp | 73 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/microinstruction.h | 22 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/modifiers.h | 10 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/opcodes.cpp | 2 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/opcodes.inc | 569 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/reg.h | 11 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/value.h | 1 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/program.cpp | 1 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | 8 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp | 232 |
13 files changed, 772 insertions, 315 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f38b46bac..ae3354c66 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -7,11 +7,24 @@ | |||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | 7 | #include "shader_recompiler/frontend/ir/value.h" |
| 8 | 8 | ||
| 9 | namespace Shader::IR { | 9 | namespace Shader::IR { |
| 10 | 10 | namespace { | |
| 11 | [[noreturn]] static void ThrowInvalidType(Type type) { | 11 | [[noreturn]] void ThrowInvalidType(Type type) { |
| 12 | throw InvalidArgument("Invalid type {}", type); | 12 | throw InvalidArgument("Invalid type {}", type); |
| 13 | } | 13 | } |
| 14 | 14 | ||
| 15 | Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { | ||
| 16 | if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { | ||
| 17 | return ir.CompositeConstruct(bias_lod, lod_clamp); | ||
| 18 | } else if (!bias_lod.IsEmpty()) { | ||
| 19 | return bias_lod; | ||
| 20 | } else if (!lod_clamp.IsEmpty()) { | ||
| 21 | return lod_clamp; | ||
| 22 | } else { | ||
| 23 | return Value{}; | ||
| 24 | } | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 15 | U1 IREmitter::Imm1(bool value) const { | 28 | U1 IREmitter::Imm1(bool value) const { |
| 16 | return U1{Value{value}}; | 29 | return U1{Value{value}}; |
| 17 | } | 30 | } |
| @@ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { | |||
| 261 | return Inst<U1>(Opcode::GetOverflowFromOp, op); | 274 | return Inst<U1>(Opcode::GetOverflowFromOp, op); |
| 262 | } | 275 | } |
| 263 | 276 | ||
| 277 | U1 IREmitter::GetSparseFromOp(const Value& op) { | ||
| 278 | return Inst<U1>(Opcode::GetSparseFromOp, op); | ||
| 279 | } | ||
| 280 | |||
| 264 | F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | 281 | F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { |
| 265 | if (a.Type() != a.Type()) { | 282 | if (a.Type() != a.Type()) { |
| 266 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | 283 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); |
| @@ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v | |||
| 1035 | } | 1052 | } |
| 1036 | } | 1053 | } |
| 1037 | 1054 | ||
| 1055 | F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { | ||
| 1056 | switch (bitsize) { | ||
| 1057 | case 16: | ||
| 1058 | switch (value.Type()) { | ||
| 1059 | case Type::U32: | ||
| 1060 | return Inst<F16>(Opcode::ConvertF16S32, value); | ||
| 1061 | case Type::U64: | ||
| 1062 | return Inst<F16>(Opcode::ConvertF16S64, value); | ||
| 1063 | default: | ||
| 1064 | ThrowInvalidType(value.Type()); | ||
| 1065 | } | ||
| 1066 | case 32: | ||
| 1067 | switch (value.Type()) { | ||
| 1068 | case Type::U32: | ||
| 1069 | return Inst<F32>(Opcode::ConvertF32S32, value); | ||
| 1070 | case Type::U64: | ||
| 1071 | return Inst<F32>(Opcode::ConvertF32S64, value); | ||
| 1072 | default: | ||
| 1073 | ThrowInvalidType(value.Type()); | ||
| 1074 | } | ||
| 1075 | case 64: | ||
| 1076 | switch (value.Type()) { | ||
| 1077 | case Type::U32: | ||
| 1078 | return Inst<F16>(Opcode::ConvertF64S32, value); | ||
| 1079 | case Type::U64: | ||
| 1080 | return Inst<F16>(Opcode::ConvertF64S64, value); | ||
| 1081 | default: | ||
| 1082 | ThrowInvalidType(value.Type()); | ||
| 1083 | } | ||
| 1084 | default: | ||
| 1085 | throw InvalidArgument("Invalid destination bitsize {}", bitsize); | ||
| 1086 | } | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { | ||
| 1090 | switch (bitsize) { | ||
| 1091 | case 16: | ||
| 1092 | switch (value.Type()) { | ||
| 1093 | case Type::U32: | ||
| 1094 | return Inst<F16>(Opcode::ConvertF16U32, value); | ||
| 1095 | case Type::U64: | ||
| 1096 | return Inst<F16>(Opcode::ConvertF16U64, value); | ||
| 1097 | default: | ||
| 1098 | ThrowInvalidType(value.Type()); | ||
| 1099 | } | ||
| 1100 | case 32: | ||
| 1101 | switch (value.Type()) { | ||
| 1102 | case Type::U32: | ||
| 1103 | return Inst<F32>(Opcode::ConvertF32U32, value); | ||
| 1104 | case Type::U64: | ||
| 1105 | return Inst<F32>(Opcode::ConvertF32U64, value); | ||
| 1106 | default: | ||
| 1107 | ThrowInvalidType(value.Type()); | ||
| 1108 | } | ||
| 1109 | case 64: | ||
| 1110 | switch (value.Type()) { | ||
| 1111 | case Type::U32: | ||
| 1112 | return Inst<F16>(Opcode::ConvertF64U32, value); | ||
| 1113 | case Type::U64: | ||
| 1114 | return Inst<F16>(Opcode::ConvertF64U64, value); | ||
| 1115 | default: | ||
| 1116 | ThrowInvalidType(value.Type()); | ||
| 1117 | } | ||
| 1118 | default: | ||
| 1119 | throw InvalidArgument("Invalid destination bitsize {}", bitsize); | ||
| 1120 | } | ||
| 1121 | } | ||
| 1122 | |||
| 1123 | F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { | ||
| 1124 | if (is_signed) { | ||
| 1125 | return ConvertSToF(bitsize, value); | ||
| 1126 | } else { | ||
| 1127 | return ConvertUToF(bitsize, value); | ||
| 1128 | } | ||
| 1129 | } | ||
| 1130 | |||
| 1038 | U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { | 1131 | U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { |
| 1039 | switch (result_bitsize) { | 1132 | switch (result_bitsize) { |
| 1040 | case 32: | 1133 | case 32: |
| @@ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | |||
| 1107 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | 1200 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); |
| 1108 | } | 1201 | } |
| 1109 | 1202 | ||
| 1203 | Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, | ||
| 1204 | const Value& offset, const F32& lod_clamp, | ||
| 1205 | TextureInstInfo info) { | ||
| 1206 | const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; | ||
| 1207 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod | ||
| 1208 | : Opcode::BindlessImageSampleImplicitLod}; | ||
| 1209 | return Inst(op, Flags{info}, handle, coords, bias_lc, offset); | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, | ||
| 1213 | const Value& offset, const F32& lod_clamp, | ||
| 1214 | TextureInstInfo info) { | ||
| 1215 | const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; | ||
| 1216 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod | ||
| 1217 | : Opcode::BindlessImageSampleExplicitLod}; | ||
| 1218 | return Inst(op, Flags{info}, handle, coords, lod_lc, offset); | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, | ||
| 1222 | const F32& bias, const Value& offset, | ||
| 1223 | const F32& lod_clamp, TextureInstInfo info) { | ||
| 1224 | const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; | ||
| 1225 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod | ||
| 1226 | : Opcode::BindlessImageSampleDrefImplicitLod}; | ||
| 1227 | return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset); | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, | ||
| 1231 | const F32& lod, const Value& offset, const F32& lod_clamp, | ||
| 1232 | TextureInstInfo info) { | ||
| 1233 | const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; | ||
| 1234 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod | ||
| 1235 | : Opcode::BindlessImageSampleDrefExplicitLod}; | ||
| 1236 | return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset); | ||
| 1237 | } | ||
| 1238 | |||
| 1110 | } // namespace Shader::IR | 1239 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 6e29bf0e2..cb2a7710a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -91,6 +91,7 @@ public: | |||
| 91 | [[nodiscard]] U1 GetSignFromOp(const Value& op); | 91 | [[nodiscard]] U1 GetSignFromOp(const Value& op); |
| 92 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); | 92 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); |
| 93 | [[nodiscard]] U1 GetOverflowFromOp(const Value& op); | 93 | [[nodiscard]] U1 GetOverflowFromOp(const Value& op); |
| 94 | [[nodiscard]] U1 GetSparseFromOp(const Value& op); | ||
| 94 | 95 | ||
| 95 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); | 96 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); |
| 96 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); | 97 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); |
| @@ -159,7 +160,7 @@ public: | |||
| 159 | [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | 160 | [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, |
| 160 | const U32& count); | 161 | const U32& count); |
| 161 | [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, | 162 | [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, |
| 162 | bool is_signed); | 163 | bool is_signed = false); |
| 163 | [[nodiscard]] U32 BitReverse(const U32& value); | 164 | [[nodiscard]] U32 BitReverse(const U32& value); |
| 164 | [[nodiscard]] U32 BitCount(const U32& value); | 165 | [[nodiscard]] U32 BitCount(const U32& value); |
| 165 | [[nodiscard]] U32 BitwiseNot(const U32& value); | 166 | [[nodiscard]] U32 BitwiseNot(const U32& value); |
| @@ -186,10 +187,28 @@ public: | |||
| 186 | [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); | 187 | [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); |
| 187 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); | 188 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); |
| 188 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); | 189 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); |
| 190 | [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); | ||
| 191 | [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); | ||
| 192 | [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); | ||
| 189 | 193 | ||
| 190 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); | 194 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); |
| 191 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); | 195 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); |
| 192 | 196 | ||
| 197 | [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, | ||
| 198 | const F32& bias, const Value& offset, | ||
| 199 | const F32& lod_clamp, TextureInstInfo info); | ||
| 200 | [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, | ||
| 201 | const F32& lod, const Value& offset, | ||
| 202 | const F32& lod_clamp, TextureInstInfo info); | ||
| 203 | [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, | ||
| 204 | const F32& dref, const F32& bias, | ||
| 205 | const Value& offset, const F32& lod_clamp, | ||
| 206 | TextureInstInfo info); | ||
| 207 | [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, | ||
| 208 | const F32& dref, const F32& lod, | ||
| 209 | const Value& offset, const F32& lod_clamp, | ||
| 210 | TextureInstInfo info); | ||
| 211 | |||
| 193 | private: | 212 | private: |
| 194 | IR::Block::iterator insertion_point; | 213 | IR::Block::iterator insertion_point; |
| 195 | 214 | ||
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index d6a9be87d..88e186f21 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -10,26 +10,27 @@ | |||
| 10 | #include "shader_recompiler/frontend/ir/type.h" | 10 | #include "shader_recompiler/frontend/ir/type.h" |
| 11 | 11 | ||
| 12 | namespace Shader::IR { | 12 | namespace Shader::IR { |
| 13 | 13 | namespace { | |
| 14 | static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { | 14 | void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { |
| 15 | if (inst && inst->Opcode() != opcode) { | 15 | if (inst && inst->Opcode() != opcode) { |
| 16 | throw LogicError("Invalid pseudo-instruction"); | 16 | throw LogicError("Invalid pseudo-instruction"); |
| 17 | } | 17 | } |
| 18 | } | 18 | } |
| 19 | 19 | ||
| 20 | static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { | 20 | void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { |
| 21 | if (dest_inst) { | 21 | if (dest_inst) { |
| 22 | throw LogicError("Only one of each type of pseudo-op allowed"); | 22 | throw LogicError("Only one of each type of pseudo-op allowed"); |
| 23 | } | 23 | } |
| 24 | dest_inst = pseudo_inst; | 24 | dest_inst = pseudo_inst; |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { | 27 | void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { |
| 28 | if (inst->Opcode() != expected_opcode) { | 28 | if (inst->Opcode() != expected_opcode) { |
| 29 | throw LogicError("Undoing use of invalid pseudo-op"); | 29 | throw LogicError("Undoing use of invalid pseudo-op"); |
| 30 | } | 30 | } |
| 31 | inst = nullptr; | 31 | inst = nullptr; |
| 32 | } | 32 | } |
| 33 | } // Anonymous namespace | ||
| 33 | 34 | ||
| 34 | Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { | 35 | Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { |
| 35 | if (op == Opcode::Phi) { | 36 | if (op == Opcode::Phi) { |
| @@ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept { | |||
| 82 | case Opcode::GetSignFromOp: | 83 | case Opcode::GetSignFromOp: |
| 83 | case Opcode::GetCarryFromOp: | 84 | case Opcode::GetCarryFromOp: |
| 84 | case Opcode::GetOverflowFromOp: | 85 | case Opcode::GetOverflowFromOp: |
| 86 | case Opcode::GetSparseFromOp: | ||
| 85 | return true; | 87 | return true; |
| 86 | default: | 88 | default: |
| 87 | return false; | 89 | return false; |
| @@ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const { | |||
| 96 | [](const IR::Value& value) { return value.IsImmediate(); }); | 98 | [](const IR::Value& value) { return value.IsImmediate(); }); |
| 97 | } | 99 | } |
| 98 | 100 | ||
| 99 | bool Inst::HasAssociatedPseudoOperation() const noexcept { | ||
| 100 | return zero_inst || sign_inst || carry_inst || overflow_inst; | ||
| 101 | } | ||
| 102 | |||
| 103 | Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | 101 | Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { |
| 104 | // This is faster than doing a search through the block. | 102 | if (!associated_insts) { |
| 103 | return nullptr; | ||
| 104 | } | ||
| 105 | switch (opcode) { | 105 | switch (opcode) { |
| 106 | case Opcode::GetZeroFromOp: | 106 | case Opcode::GetZeroFromOp: |
| 107 | CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp); | 107 | CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp); |
| 108 | return zero_inst; | 108 | return associated_insts->zero_inst; |
| 109 | case Opcode::GetSignFromOp: | 109 | case Opcode::GetSignFromOp: |
| 110 | CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp); | 110 | CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp); |
| 111 | return sign_inst; | 111 | return associated_insts->sign_inst; |
| 112 | case Opcode::GetCarryFromOp: | 112 | case Opcode::GetCarryFromOp: |
| 113 | CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp); | 113 | CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp); |
| 114 | return carry_inst; | 114 | return associated_insts->carry_inst; |
| 115 | case Opcode::GetOverflowFromOp: | 115 | case Opcode::GetOverflowFromOp: |
| 116 | CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); | 116 | CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp); |
| 117 | return overflow_inst; | 117 | return associated_insts->overflow_inst; |
| 118 | case Opcode::GetSparseFromOp: | ||
| 119 | CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); | ||
| 120 | return associated_insts->sparse_inst; | ||
| 118 | default: | 121 | default: |
| 119 | throw InvalidArgument("{} is not a pseudo-instruction", opcode); | 122 | throw InvalidArgument("{} is not a pseudo-instruction", opcode); |
| 120 | } | 123 | } |
| @@ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { | |||
| 220 | op = opcode; | 223 | op = opcode; |
| 221 | } | 224 | } |
| 222 | 225 | ||
| 226 | void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) { | ||
| 227 | if (!associated_insts) { | ||
| 228 | associated_insts = std::make_unique<AssociatedInsts>(); | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 223 | void Inst::Use(const Value& value) { | 232 | void Inst::Use(const Value& value) { |
| 224 | Inst* const inst{value.Inst()}; | 233 | Inst* const inst{value.Inst()}; |
| 225 | ++inst->use_count; | 234 | ++inst->use_count; |
| 226 | 235 | ||
| 236 | std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; | ||
| 227 | switch (op) { | 237 | switch (op) { |
| 228 | case Opcode::GetZeroFromOp: | 238 | case Opcode::GetZeroFromOp: |
| 229 | SetPseudoInstruction(inst->zero_inst, this); | 239 | AllocAssociatedInsts(assoc_inst); |
| 240 | SetPseudoInstruction(assoc_inst->zero_inst, this); | ||
| 230 | break; | 241 | break; |
| 231 | case Opcode::GetSignFromOp: | 242 | case Opcode::GetSignFromOp: |
| 232 | SetPseudoInstruction(inst->sign_inst, this); | 243 | AllocAssociatedInsts(assoc_inst); |
| 244 | SetPseudoInstruction(assoc_inst->sign_inst, this); | ||
| 233 | break; | 245 | break; |
| 234 | case Opcode::GetCarryFromOp: | 246 | case Opcode::GetCarryFromOp: |
| 235 | SetPseudoInstruction(inst->carry_inst, this); | 247 | AllocAssociatedInsts(assoc_inst); |
| 248 | SetPseudoInstruction(assoc_inst->carry_inst, this); | ||
| 236 | break; | 249 | break; |
| 237 | case Opcode::GetOverflowFromOp: | 250 | case Opcode::GetOverflowFromOp: |
| 238 | SetPseudoInstruction(inst->overflow_inst, this); | 251 | AllocAssociatedInsts(assoc_inst); |
| 252 | SetPseudoInstruction(assoc_inst->overflow_inst, this); | ||
| 253 | break; | ||
| 254 | case Opcode::GetSparseFromOp: | ||
| 255 | AllocAssociatedInsts(assoc_inst); | ||
| 256 | SetPseudoInstruction(assoc_inst->sparse_inst, this); | ||
| 239 | break; | 257 | break; |
| 240 | default: | 258 | default: |
| 241 | break; | 259 | break; |
| @@ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) { | |||
| 246 | Inst* const inst{value.Inst()}; | 264 | Inst* const inst{value.Inst()}; |
| 247 | --inst->use_count; | 265 | --inst->use_count; |
| 248 | 266 | ||
| 267 | std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; | ||
| 249 | switch (op) { | 268 | switch (op) { |
| 250 | case Opcode::GetZeroFromOp: | 269 | case Opcode::GetZeroFromOp: |
| 251 | RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); | 270 | AllocAssociatedInsts(assoc_inst); |
| 271 | RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp); | ||
| 252 | break; | 272 | break; |
| 253 | case Opcode::GetSignFromOp: | 273 | case Opcode::GetSignFromOp: |
| 254 | RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); | 274 | AllocAssociatedInsts(assoc_inst); |
| 275 | RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp); | ||
| 255 | break; | 276 | break; |
| 256 | case Opcode::GetCarryFromOp: | 277 | case Opcode::GetCarryFromOp: |
| 257 | RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); | 278 | AllocAssociatedInsts(assoc_inst); |
| 279 | RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp); | ||
| 258 | break; | 280 | break; |
| 259 | case Opcode::GetOverflowFromOp: | 281 | case Opcode::GetOverflowFromOp: |
| 260 | RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); | 282 | AllocAssociatedInsts(assoc_inst); |
| 283 | RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); | ||
| 261 | break; | 284 | break; |
| 262 | default: | 285 | default: |
| 263 | break; | 286 | break; |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 321393dd7..d5336c438 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h | |||
| @@ -22,7 +22,7 @@ namespace Shader::IR { | |||
| 22 | 22 | ||
| 23 | class Block; | 23 | class Block; |
| 24 | 24 | ||
| 25 | constexpr size_t MAX_ARG_COUNT = 4; | 25 | struct AssociatedInsts; |
| 26 | 26 | ||
| 27 | class Inst : public boost::intrusive::list_base_hook<> { | 27 | class Inst : public boost::intrusive::list_base_hook<> { |
| 28 | public: | 28 | public: |
| @@ -50,6 +50,11 @@ public: | |||
| 50 | return op; | 50 | return op; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | /// Determines if there is a pseudo-operation associated with this instruction. | ||
| 54 | [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { | ||
| 55 | return associated_insts != nullptr; | ||
| 56 | } | ||
| 57 | |||
| 53 | /// Determines whether or not this instruction may have side effects. | 58 | /// Determines whether or not this instruction may have side effects. |
| 54 | [[nodiscard]] bool MayHaveSideEffects() const noexcept; | 59 | [[nodiscard]] bool MayHaveSideEffects() const noexcept; |
| 55 | 60 | ||
| @@ -60,8 +65,6 @@ public: | |||
| 60 | /// Determines if all arguments of this instruction are immediates. | 65 | /// Determines if all arguments of this instruction are immediates. |
| 61 | [[nodiscard]] bool AreAllArgsImmediates() const; | 66 | [[nodiscard]] bool AreAllArgsImmediates() const; |
| 62 | 67 | ||
| 63 | /// Determines if there is a pseudo-operation associated with this instruction. | ||
| 64 | [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; | ||
| 65 | /// Gets a pseudo-operation associated with this instruction | 68 | /// Gets a pseudo-operation associated with this instruction |
| 66 | [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); | 69 | [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); |
| 67 | 70 | ||
| @@ -122,14 +125,21 @@ private: | |||
| 122 | u32 definition{}; | 125 | u32 definition{}; |
| 123 | union { | 126 | union { |
| 124 | NonTriviallyDummy dummy{}; | 127 | NonTriviallyDummy dummy{}; |
| 125 | std::array<Value, MAX_ARG_COUNT> args; | ||
| 126 | std::vector<std::pair<Block*, Value>> phi_args; | 128 | std::vector<std::pair<Block*, Value>> phi_args; |
| 129 | std::array<Value, 5> args; | ||
| 130 | }; | ||
| 131 | std::unique_ptr<AssociatedInsts> associated_insts; | ||
| 132 | }; | ||
| 133 | static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); | ||
| 134 | |||
| 135 | struct AssociatedInsts { | ||
| 136 | union { | ||
| 137 | Inst* sparse_inst; | ||
| 138 | Inst* zero_inst{}; | ||
| 127 | }; | 139 | }; |
| 128 | Inst* zero_inst{}; | ||
| 129 | Inst* sign_inst{}; | 140 | Inst* sign_inst{}; |
| 130 | Inst* carry_inst{}; | 141 | Inst* carry_inst{}; |
| 131 | Inst* overflow_inst{}; | 142 | Inst* overflow_inst{}; |
| 132 | }; | 143 | }; |
| 133 | static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size"); | ||
| 134 | 144 | ||
| 135 | } // namespace Shader::IR | 145 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 44652eae7..ad07700ae 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -4,7 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/bit_field.h" | ||
| 7 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "shader_recompiler/shader_info.h" | ||
| 8 | 10 | ||
| 9 | namespace Shader::IR { | 11 | namespace Shader::IR { |
| 10 | 12 | ||
| @@ -30,4 +32,12 @@ struct FpControl { | |||
| 30 | }; | 32 | }; |
| 31 | static_assert(sizeof(FpControl) <= sizeof(u32)); | 33 | static_assert(sizeof(FpControl) <= sizeof(u32)); |
| 32 | 34 | ||
| 35 | union TextureInstInfo { | ||
| 36 | u32 raw; | ||
| 37 | BitField<0, 8, TextureType> type; | ||
| 38 | BitField<8, 1, u32> has_bias; | ||
| 39 | BitField<16, 1, u32> has_lod_clamp; | ||
| 40 | }; | ||
| 41 | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); | ||
| 42 | |||
| 33 | } // namespace Shader::IR | 43 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1f188411a..8492a13d5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp | |||
| @@ -14,7 +14,7 @@ namespace { | |||
| 14 | struct OpcodeMeta { | 14 | struct OpcodeMeta { |
| 15 | std::string_view name; | 15 | std::string_view name; |
| 16 | Type type; | 16 | Type type; |
| 17 | std::array<Type, 4> arg_types; | 17 | std::array<Type, 5> arg_types; |
| 18 | }; | 18 | }; |
| 19 | 19 | ||
| 20 | using enum Type; | 20 | using enum Type; |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index c4e72c84d..aa011fab1 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -2,301 +2,330 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... | 5 | // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ... |
| 6 | OPCODE(Phi, Opaque, ) | 6 | OPCODE(Phi, Opaque, ) |
| 7 | OPCODE(Identity, Opaque, Opaque, ) | 7 | OPCODE(Identity, Opaque, Opaque, ) |
| 8 | OPCODE(Void, Void, ) | 8 | OPCODE(Void, Void, ) |
| 9 | 9 | ||
| 10 | // Control flow | 10 | // Control flow |
| 11 | OPCODE(Branch, Void, Label, ) | 11 | OPCODE(Branch, Void, Label, ) |
| 12 | OPCODE(BranchConditional, Void, U1, Label, Label, ) | 12 | OPCODE(BranchConditional, Void, U1, Label, Label, ) |
| 13 | OPCODE(LoopMerge, Void, Label, Label, ) | 13 | OPCODE(LoopMerge, Void, Label, Label, ) |
| 14 | OPCODE(SelectionMerge, Void, Label, ) | 14 | OPCODE(SelectionMerge, Void, Label, ) |
| 15 | OPCODE(Return, Void, ) | 15 | OPCODE(Return, Void, ) |
| 16 | 16 | ||
| 17 | // Context getters/setters | 17 | // Context getters/setters |
| 18 | OPCODE(GetRegister, U32, Reg, ) | 18 | OPCODE(GetRegister, U32, Reg, ) |
| 19 | OPCODE(SetRegister, Void, Reg, U32, ) | 19 | OPCODE(SetRegister, Void, Reg, U32, ) |
| 20 | OPCODE(GetPred, U1, Pred, ) | 20 | OPCODE(GetPred, U1, Pred, ) |
| 21 | OPCODE(SetPred, Void, Pred, U1, ) | 21 | OPCODE(SetPred, Void, Pred, U1, ) |
| 22 | OPCODE(GetGotoVariable, U1, U32, ) | 22 | OPCODE(GetGotoVariable, U1, U32, ) |
| 23 | OPCODE(SetGotoVariable, Void, U32, U1, ) | 23 | OPCODE(SetGotoVariable, Void, U32, U1, ) |
| 24 | OPCODE(GetCbuf, U32, U32, U32, ) | 24 | OPCODE(GetCbuf, U32, U32, U32, ) |
| 25 | OPCODE(GetAttribute, U32, Attribute, ) | 25 | OPCODE(GetAttribute, U32, Attribute, ) |
| 26 | OPCODE(SetAttribute, Void, Attribute, U32, ) | 26 | OPCODE(SetAttribute, Void, Attribute, U32, ) |
| 27 | OPCODE(GetAttributeIndexed, U32, U32, ) | 27 | OPCODE(GetAttributeIndexed, U32, U32, ) |
| 28 | OPCODE(SetAttributeIndexed, Void, U32, U32, ) | 28 | OPCODE(SetAttributeIndexed, Void, U32, U32, ) |
| 29 | OPCODE(GetZFlag, U1, Void, ) | 29 | OPCODE(GetZFlag, U1, Void, ) |
| 30 | OPCODE(GetSFlag, U1, Void, ) | 30 | OPCODE(GetSFlag, U1, Void, ) |
| 31 | OPCODE(GetCFlag, U1, Void, ) | 31 | OPCODE(GetCFlag, U1, Void, ) |
| 32 | OPCODE(GetOFlag, U1, Void, ) | 32 | OPCODE(GetOFlag, U1, Void, ) |
| 33 | OPCODE(SetZFlag, Void, U1, ) | 33 | OPCODE(SetZFlag, Void, U1, ) |
| 34 | OPCODE(SetSFlag, Void, U1, ) | 34 | OPCODE(SetSFlag, Void, U1, ) |
| 35 | OPCODE(SetCFlag, Void, U1, ) | 35 | OPCODE(SetCFlag, Void, U1, ) |
| 36 | OPCODE(SetOFlag, Void, U1, ) | 36 | OPCODE(SetOFlag, Void, U1, ) |
| 37 | OPCODE(WorkgroupId, U32x3, ) | 37 | OPCODE(WorkgroupId, U32x3, ) |
| 38 | OPCODE(LocalInvocationId, U32x3, ) | 38 | OPCODE(LocalInvocationId, U32x3, ) |
| 39 | 39 | ||
| 40 | // Undefined | 40 | // Undefined |
| 41 | OPCODE(UndefU1, U1, ) | 41 | OPCODE(UndefU1, U1, ) |
| 42 | OPCODE(UndefU8, U8, ) | 42 | OPCODE(UndefU8, U8, ) |
| 43 | OPCODE(UndefU16, U16, ) | 43 | OPCODE(UndefU16, U16, ) |
| 44 | OPCODE(UndefU32, U32, ) | 44 | OPCODE(UndefU32, U32, ) |
| 45 | OPCODE(UndefU64, U64, ) | 45 | OPCODE(UndefU64, U64, ) |
| 46 | 46 | ||
| 47 | // Memory operations | 47 | // Memory operations |
| 48 | OPCODE(LoadGlobalU8, U32, U64, ) | 48 | OPCODE(LoadGlobalU8, U32, U64, ) |
| 49 | OPCODE(LoadGlobalS8, U32, U64, ) | 49 | OPCODE(LoadGlobalS8, U32, U64, ) |
| 50 | OPCODE(LoadGlobalU16, U32, U64, ) | 50 | OPCODE(LoadGlobalU16, U32, U64, ) |
| 51 | OPCODE(LoadGlobalS16, U32, U64, ) | 51 | OPCODE(LoadGlobalS16, U32, U64, ) |
| 52 | OPCODE(LoadGlobal32, U32, U64, ) | 52 | OPCODE(LoadGlobal32, U32, U64, ) |
| 53 | OPCODE(LoadGlobal64, U32x2, U64, ) | 53 | OPCODE(LoadGlobal64, U32x2, U64, ) |
| 54 | OPCODE(LoadGlobal128, U32x4, U64, ) | 54 | OPCODE(LoadGlobal128, U32x4, U64, ) |
| 55 | OPCODE(WriteGlobalU8, Void, U64, U32, ) | 55 | OPCODE(WriteGlobalU8, Void, U64, U32, ) |
| 56 | OPCODE(WriteGlobalS8, Void, U64, U32, ) | 56 | OPCODE(WriteGlobalS8, Void, U64, U32, ) |
| 57 | OPCODE(WriteGlobalU16, Void, U64, U32, ) | 57 | OPCODE(WriteGlobalU16, Void, U64, U32, ) |
| 58 | OPCODE(WriteGlobalS16, Void, U64, U32, ) | 58 | OPCODE(WriteGlobalS16, Void, U64, U32, ) |
| 59 | OPCODE(WriteGlobal32, Void, U64, U32, ) | 59 | OPCODE(WriteGlobal32, Void, U64, U32, ) |
| 60 | OPCODE(WriteGlobal64, Void, U64, U32x2, ) | 60 | OPCODE(WriteGlobal64, Void, U64, U32x2, ) |
| 61 | OPCODE(WriteGlobal128, Void, U64, U32x4, ) | 61 | OPCODE(WriteGlobal128, Void, U64, U32x4, ) |
| 62 | 62 | ||
| 63 | // Storage buffer operations | 63 | // Storage buffer operations |
| 64 | OPCODE(LoadStorageU8, U32, U32, U32, ) | 64 | OPCODE(LoadStorageU8, U32, U32, U32, ) |
| 65 | OPCODE(LoadStorageS8, U32, U32, U32, ) | 65 | OPCODE(LoadStorageS8, U32, U32, U32, ) |
| 66 | OPCODE(LoadStorageU16, U32, U32, U32, ) | 66 | OPCODE(LoadStorageU16, U32, U32, U32, ) |
| 67 | OPCODE(LoadStorageS16, U32, U32, U32, ) | 67 | OPCODE(LoadStorageS16, U32, U32, U32, ) |
| 68 | OPCODE(LoadStorage32, U32, U32, U32, ) | 68 | OPCODE(LoadStorage32, U32, U32, U32, ) |
| 69 | OPCODE(LoadStorage64, U32x2, U32, U32, ) | 69 | OPCODE(LoadStorage64, U32x2, U32, U32, ) |
| 70 | OPCODE(LoadStorage128, U32x4, U32, U32, ) | 70 | OPCODE(LoadStorage128, U32x4, U32, U32, ) |
| 71 | OPCODE(WriteStorageU8, Void, U32, U32, U32, ) | 71 | OPCODE(WriteStorageU8, Void, U32, U32, U32, ) |
| 72 | OPCODE(WriteStorageS8, Void, U32, U32, U32, ) | 72 | OPCODE(WriteStorageS8, Void, U32, U32, U32, ) |
| 73 | OPCODE(WriteStorageU16, Void, U32, U32, U32, ) | 73 | OPCODE(WriteStorageU16, Void, U32, U32, U32, ) |
| 74 | OPCODE(WriteStorageS16, Void, U32, U32, U32, ) | 74 | OPCODE(WriteStorageS16, Void, U32, U32, U32, ) |
| 75 | OPCODE(WriteStorage32, Void, U32, U32, U32, ) | 75 | OPCODE(WriteStorage32, Void, U32, U32, U32, ) |
| 76 | OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) | 76 | OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) |
| 77 | OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) | 77 | OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) |
| 78 | 78 | ||
| 79 | // Vector utility | 79 | // Vector utility |
| 80 | OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) | 80 | OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) |
| 81 | OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) | 81 | OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) |
| 82 | OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) | 82 | OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) |
| 83 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) | 83 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) |
| 84 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) | 84 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) |
| 85 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) | 85 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) |
| 86 | OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) | 86 | OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) |
| 87 | OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) | 87 | OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) |
| 88 | OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) | 88 | OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) |
| 89 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) | 89 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) |
| 90 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) | 90 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) |
| 91 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) | 91 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) |
| 92 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) | 92 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) |
| 93 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) | 93 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) |
| 94 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) | 94 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) |
| 95 | OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) | 95 | OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) |
| 96 | OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) | 96 | OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) |
| 97 | OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) | 97 | OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) |
| 98 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) | 98 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) |
| 99 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) | 99 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) |
| 100 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) | 100 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) |
| 101 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) | 101 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) |
| 102 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) | 102 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) |
| 103 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) | 103 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) |
| 104 | OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) | 104 | OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) |
| 105 | OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) | 105 | OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) |
| 106 | OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) | 106 | OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) |
| 107 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) | 107 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) |
| 108 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) | 108 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) |
| 109 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) | 109 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) |
| 110 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) | 110 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) |
| 111 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) | 111 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) |
| 112 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) | 112 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) |
| 113 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) | 113 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) |
| 114 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) | 114 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) |
| 115 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) | 115 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) |
| 116 | 116 | ||
| 117 | // Select operations | 117 | // Select operations |
| 118 | OPCODE(SelectU1, U1, U1, U1, U1, ) | 118 | OPCODE(SelectU1, U1, U1, U1, U1, ) |
| 119 | OPCODE(SelectU8, U8, U1, U8, U8, ) | 119 | OPCODE(SelectU8, U8, U1, U8, U8, ) |
| 120 | OPCODE(SelectU16, U16, U1, U16, U16, ) | 120 | OPCODE(SelectU16, U16, U1, U16, U16, ) |
| 121 | OPCODE(SelectU32, U32, U1, U32, U32, ) | 121 | OPCODE(SelectU32, U32, U1, U32, U32, ) |
| 122 | OPCODE(SelectU64, U64, U1, U64, U64, ) | 122 | OPCODE(SelectU64, U64, U1, U64, U64, ) |
| 123 | OPCODE(SelectF16, F16, U1, F16, F16, ) | 123 | OPCODE(SelectF16, F16, U1, F16, F16, ) |
| 124 | OPCODE(SelectF32, F32, U1, F32, F32, ) | 124 | OPCODE(SelectF32, F32, U1, F32, F32, ) |
| 125 | 125 | ||
| 126 | // Bitwise conversions | 126 | // Bitwise conversions |
| 127 | OPCODE(BitCastU16F16, U16, F16, ) | 127 | OPCODE(BitCastU16F16, U16, F16, ) |
| 128 | OPCODE(BitCastU32F32, U32, F32, ) | 128 | OPCODE(BitCastU32F32, U32, F32, ) |
| 129 | OPCODE(BitCastU64F64, U64, F64, ) | 129 | OPCODE(BitCastU64F64, U64, F64, ) |
| 130 | OPCODE(BitCastF16U16, F16, U16, ) | 130 | OPCODE(BitCastF16U16, F16, U16, ) |
| 131 | OPCODE(BitCastF32U32, F32, U32, ) | 131 | OPCODE(BitCastF32U32, F32, U32, ) |
| 132 | OPCODE(BitCastF64U64, F64, U64, ) | 132 | OPCODE(BitCastF64U64, F64, U64, ) |
| 133 | OPCODE(PackUint2x32, U64, U32x2, ) | 133 | OPCODE(PackUint2x32, U64, U32x2, ) |
| 134 | OPCODE(UnpackUint2x32, U32x2, U64, ) | 134 | OPCODE(UnpackUint2x32, U32x2, U64, ) |
| 135 | OPCODE(PackFloat2x16, U32, F16x2, ) | 135 | OPCODE(PackFloat2x16, U32, F16x2, ) |
| 136 | OPCODE(UnpackFloat2x16, F16x2, U32, ) | 136 | OPCODE(UnpackFloat2x16, F16x2, U32, ) |
| 137 | OPCODE(PackHalf2x16, U32, F32x2, ) | 137 | OPCODE(PackHalf2x16, U32, F32x2, ) |
| 138 | OPCODE(UnpackHalf2x16, F32x2, U32, ) | 138 | OPCODE(UnpackHalf2x16, F32x2, U32, ) |
| 139 | OPCODE(PackDouble2x32, F64, U32x2, ) | 139 | OPCODE(PackDouble2x32, F64, U32x2, ) |
| 140 | OPCODE(UnpackDouble2x32, U32x2, F64, ) | 140 | OPCODE(UnpackDouble2x32, U32x2, F64, ) |
| 141 | 141 | ||
| 142 | // Pseudo-operation, handled specially at final emit | 142 | // Pseudo-operation, handled specially at final emit |
| 143 | OPCODE(GetZeroFromOp, U1, Opaque, ) | 143 | OPCODE(GetZeroFromOp, U1, Opaque, ) |
| 144 | OPCODE(GetSignFromOp, U1, Opaque, ) | 144 | OPCODE(GetSignFromOp, U1, Opaque, ) |
| 145 | OPCODE(GetCarryFromOp, U1, Opaque, ) | 145 | OPCODE(GetCarryFromOp, U1, Opaque, ) |
| 146 | OPCODE(GetOverflowFromOp, U1, Opaque, ) | 146 | OPCODE(GetOverflowFromOp, U1, Opaque, ) |
| 147 | OPCODE(GetSparseFromOp, U1, Opaque, ) | ||
| 147 | 148 | ||
| 148 | // Floating-point operations | 149 | // Floating-point operations |
| 149 | OPCODE(FPAbs16, F16, F16, ) | 150 | OPCODE(FPAbs16, F16, F16, ) |
| 150 | OPCODE(FPAbs32, F32, F32, ) | 151 | OPCODE(FPAbs32, F32, F32, ) |
| 151 | OPCODE(FPAbs64, F64, F64, ) | 152 | OPCODE(FPAbs64, F64, F64, ) |
| 152 | OPCODE(FPAdd16, F16, F16, F16, ) | 153 | OPCODE(FPAdd16, F16, F16, F16, ) |
| 153 | OPCODE(FPAdd32, F32, F32, F32, ) | 154 | OPCODE(FPAdd32, F32, F32, F32, ) |
| 154 | OPCODE(FPAdd64, F64, F64, F64, ) | 155 | OPCODE(FPAdd64, F64, F64, F64, ) |
| 155 | OPCODE(FPFma16, F16, F16, F16, F16, ) | 156 | OPCODE(FPFma16, F16, F16, F16, F16, ) |
| 156 | OPCODE(FPFma32, F32, F32, F32, F32, ) | 157 | OPCODE(FPFma32, F32, F32, F32, F32, ) |
| 157 | OPCODE(FPFma64, F64, F64, F64, F64, ) | 158 | OPCODE(FPFma64, F64, F64, F64, F64, ) |
| 158 | OPCODE(FPMax32, F32, F32, F32, ) | 159 | OPCODE(FPMax32, F32, F32, F32, ) |
| 159 | OPCODE(FPMax64, F64, F64, F64, ) | 160 | OPCODE(FPMax64, F64, F64, F64, ) |
| 160 | OPCODE(FPMin32, F32, F32, F32, ) | 161 | OPCODE(FPMin32, F32, F32, F32, ) |
| 161 | OPCODE(FPMin64, F64, F64, F64, ) | 162 | OPCODE(FPMin64, F64, F64, F64, ) |
| 162 | OPCODE(FPMul16, F16, F16, F16, ) | 163 | OPCODE(FPMul16, F16, F16, F16, ) |
| 163 | OPCODE(FPMul32, F32, F32, F32, ) | 164 | OPCODE(FPMul32, F32, F32, F32, ) |
| 164 | OPCODE(FPMul64, F64, F64, F64, ) | 165 | OPCODE(FPMul64, F64, F64, F64, ) |
| 165 | OPCODE(FPNeg16, F16, F16, ) | 166 | OPCODE(FPNeg16, F16, F16, ) |
| 166 | OPCODE(FPNeg32, F32, F32, ) | 167 | OPCODE(FPNeg32, F32, F32, ) |
| 167 | OPCODE(FPNeg64, F64, F64, ) | 168 | OPCODE(FPNeg64, F64, F64, ) |
| 168 | OPCODE(FPRecip32, F32, F32, ) | 169 | OPCODE(FPRecip32, F32, F32, ) |
| 169 | OPCODE(FPRecip64, F64, F64, ) | 170 | OPCODE(FPRecip64, F64, F64, ) |
| 170 | OPCODE(FPRecipSqrt32, F32, F32, ) | 171 | OPCODE(FPRecipSqrt32, F32, F32, ) |
| 171 | OPCODE(FPRecipSqrt64, F64, F64, ) | 172 | OPCODE(FPRecipSqrt64, F64, F64, ) |
| 172 | OPCODE(FPSqrt, F32, F32, ) | 173 | OPCODE(FPSqrt, F32, F32, ) |
| 173 | OPCODE(FPSin, F32, F32, ) | 174 | OPCODE(FPSin, F32, F32, ) |
| 174 | OPCODE(FPExp2, F32, F32, ) | 175 | OPCODE(FPExp2, F32, F32, ) |
| 175 | OPCODE(FPCos, F32, F32, ) | 176 | OPCODE(FPCos, F32, F32, ) |
| 176 | OPCODE(FPLog2, F32, F32, ) | 177 | OPCODE(FPLog2, F32, F32, ) |
| 177 | OPCODE(FPSaturate16, F16, F16, ) | 178 | OPCODE(FPSaturate16, F16, F16, ) |
| 178 | OPCODE(FPSaturate32, F32, F32, ) | 179 | OPCODE(FPSaturate32, F32, F32, ) |
| 179 | OPCODE(FPSaturate64, F64, F64, ) | 180 | OPCODE(FPSaturate64, F64, F64, ) |
| 180 | OPCODE(FPRoundEven16, F16, F16, ) | 181 | OPCODE(FPRoundEven16, F16, F16, ) |
| 181 | OPCODE(FPRoundEven32, F32, F32, ) | 182 | OPCODE(FPRoundEven32, F32, F32, ) |
| 182 | OPCODE(FPRoundEven64, F64, F64, ) | 183 | OPCODE(FPRoundEven64, F64, F64, ) |
| 183 | OPCODE(FPFloor16, F16, F16, ) | 184 | OPCODE(FPFloor16, F16, F16, ) |
| 184 | OPCODE(FPFloor32, F32, F32, ) | 185 | OPCODE(FPFloor32, F32, F32, ) |
| 185 | OPCODE(FPFloor64, F64, F64, ) | 186 | OPCODE(FPFloor64, F64, F64, ) |
| 186 | OPCODE(FPCeil16, F16, F16, ) | 187 | OPCODE(FPCeil16, F16, F16, ) |
| 187 | OPCODE(FPCeil32, F32, F32, ) | 188 | OPCODE(FPCeil32, F32, F32, ) |
| 188 | OPCODE(FPCeil64, F64, F64, ) | 189 | OPCODE(FPCeil64, F64, F64, ) |
| 189 | OPCODE(FPTrunc16, F16, F16, ) | 190 | OPCODE(FPTrunc16, F16, F16, ) |
| 190 | OPCODE(FPTrunc32, F32, F32, ) | 191 | OPCODE(FPTrunc32, F32, F32, ) |
| 191 | OPCODE(FPTrunc64, F64, F64, ) | 192 | OPCODE(FPTrunc64, F64, F64, ) |
| 192 | 193 | ||
| 193 | OPCODE(FPOrdEqual16, U1, F16, F16, ) | 194 | OPCODE(FPOrdEqual16, U1, F16, F16, ) |
| 194 | OPCODE(FPOrdEqual32, U1, F32, F32, ) | 195 | OPCODE(FPOrdEqual32, U1, F32, F32, ) |
| 195 | OPCODE(FPOrdEqual64, U1, F64, F64, ) | 196 | OPCODE(FPOrdEqual64, U1, F64, F64, ) |
| 196 | OPCODE(FPUnordEqual16, U1, F16, F16, ) | 197 | OPCODE(FPUnordEqual16, U1, F16, F16, ) |
| 197 | OPCODE(FPUnordEqual32, U1, F32, F32, ) | 198 | OPCODE(FPUnordEqual32, U1, F32, F32, ) |
| 198 | OPCODE(FPUnordEqual64, U1, F64, F64, ) | 199 | OPCODE(FPUnordEqual64, U1, F64, F64, ) |
| 199 | OPCODE(FPOrdNotEqual16, U1, F16, F16, ) | 200 | OPCODE(FPOrdNotEqual16, U1, F16, F16, ) |
| 200 | OPCODE(FPOrdNotEqual32, U1, F32, F32, ) | 201 | OPCODE(FPOrdNotEqual32, U1, F32, F32, ) |
| 201 | OPCODE(FPOrdNotEqual64, U1, F64, F64, ) | 202 | OPCODE(FPOrdNotEqual64, U1, F64, F64, ) |
| 202 | OPCODE(FPUnordNotEqual16, U1, F16, F16, ) | 203 | OPCODE(FPUnordNotEqual16, U1, F16, F16, ) |
| 203 | OPCODE(FPUnordNotEqual32, U1, F32, F32, ) | 204 | OPCODE(FPUnordNotEqual32, U1, F32, F32, ) |
| 204 | OPCODE(FPUnordNotEqual64, U1, F64, F64, ) | 205 | OPCODE(FPUnordNotEqual64, U1, F64, F64, ) |
| 205 | OPCODE(FPOrdLessThan16, U1, F16, F16, ) | 206 | OPCODE(FPOrdLessThan16, U1, F16, F16, ) |
| 206 | OPCODE(FPOrdLessThan32, U1, F32, F32, ) | 207 | OPCODE(FPOrdLessThan32, U1, F32, F32, ) |
| 207 | OPCODE(FPOrdLessThan64, U1, F64, F64, ) | 208 | OPCODE(FPOrdLessThan64, U1, F64, F64, ) |
| 208 | OPCODE(FPUnordLessThan16, U1, F16, F16, ) | 209 | OPCODE(FPUnordLessThan16, U1, F16, F16, ) |
| 209 | OPCODE(FPUnordLessThan32, U1, F32, F32, ) | 210 | OPCODE(FPUnordLessThan32, U1, F32, F32, ) |
| 210 | OPCODE(FPUnordLessThan64, U1, F64, F64, ) | 211 | OPCODE(FPUnordLessThan64, U1, F64, F64, ) |
| 211 | OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) | 212 | OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) |
| 212 | OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) | 213 | OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) |
| 213 | OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) | 214 | OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) |
| 214 | OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) | 215 | OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) |
| 215 | OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) | 216 | OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) |
| 216 | OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) | 217 | OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) |
| 217 | OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) | 218 | OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) |
| 218 | OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) | 219 | OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) |
| 219 | OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) | 220 | OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) |
| 220 | OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) | 221 | OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) |
| 221 | OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) | 222 | OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) |
| 222 | OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) | 223 | OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) |
| 223 | OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) | 224 | OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) |
| 224 | OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) | 225 | OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) |
| 225 | OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) | 226 | OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) |
| 226 | OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) | 227 | OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) |
| 227 | OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) | 228 | OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) |
| 228 | OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) | 229 | OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) |
| 229 | 230 | ||
| 230 | // Integer operations | 231 | // Integer operations |
| 231 | OPCODE(IAdd32, U32, U32, U32, ) | 232 | OPCODE(IAdd32, U32, U32, U32, ) |
| 232 | OPCODE(IAdd64, U64, U64, U64, ) | 233 | OPCODE(IAdd64, U64, U64, U64, ) |
| 233 | OPCODE(ISub32, U32, U32, U32, ) | 234 | OPCODE(ISub32, U32, U32, U32, ) |
| 234 | OPCODE(ISub64, U64, U64, U64, ) | 235 | OPCODE(ISub64, U64, U64, U64, ) |
| 235 | OPCODE(IMul32, U32, U32, U32, ) | 236 | OPCODE(IMul32, U32, U32, U32, ) |
| 236 | OPCODE(INeg32, U32, U32, ) | 237 | OPCODE(INeg32, U32, U32, ) |
| 237 | OPCODE(INeg64, U64, U64, ) | 238 | OPCODE(INeg64, U64, U64, ) |
| 238 | OPCODE(IAbs32, U32, U32, ) | 239 | OPCODE(IAbs32, U32, U32, ) |
| 239 | OPCODE(ShiftLeftLogical32, U32, U32, U32, ) | 240 | OPCODE(ShiftLeftLogical32, U32, U32, U32, ) |
| 240 | OPCODE(ShiftLeftLogical64, U64, U64, U32, ) | 241 | OPCODE(ShiftLeftLogical64, U64, U64, U32, ) |
| 241 | OPCODE(ShiftRightLogical32, U32, U32, U32, ) | 242 | OPCODE(ShiftRightLogical32, U32, U32, U32, ) |
| 242 | OPCODE(ShiftRightLogical64, U64, U64, U32, ) | 243 | OPCODE(ShiftRightLogical64, U64, U64, U32, ) |
| 243 | OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) | 244 | OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) |
| 244 | OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) | 245 | OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) |
| 245 | OPCODE(BitwiseAnd32, U32, U32, U32, ) | 246 | OPCODE(BitwiseAnd32, U32, U32, U32, ) |
| 246 | OPCODE(BitwiseOr32, U32, U32, U32, ) | 247 | OPCODE(BitwiseOr32, U32, U32, U32, ) |
| 247 | OPCODE(BitwiseXor32, U32, U32, U32, ) | 248 | OPCODE(BitwiseXor32, U32, U32, U32, ) |
| 248 | OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) | 249 | OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) |
| 249 | OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) | 250 | OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) |
| 250 | OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) | 251 | OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) |
| 251 | OPCODE(BitReverse32, U32, U32, ) | 252 | OPCODE(BitReverse32, U32, U32, ) |
| 252 | OPCODE(BitCount32, U32, U32, ) | 253 | OPCODE(BitCount32, U32, U32, ) |
| 253 | OPCODE(BitwiseNot32, U32, U32, ) | 254 | OPCODE(BitwiseNot32, U32, U32, ) |
| 254 | 255 | ||
| 255 | OPCODE(FindSMsb32, U32, U32, ) | 256 | OPCODE(FindSMsb32, U32, U32, ) |
| 256 | OPCODE(FindUMsb32, U32, U32, ) | 257 | OPCODE(FindUMsb32, U32, U32, ) |
| 257 | OPCODE(SMin32, U32, U32, U32, ) | 258 | OPCODE(SMin32, U32, U32, U32, ) |
| 258 | OPCODE(UMin32, U32, U32, U32, ) | 259 | OPCODE(UMin32, U32, U32, U32, ) |
| 259 | OPCODE(SMax32, U32, U32, U32, ) | 260 | OPCODE(SMax32, U32, U32, U32, ) |
| 260 | OPCODE(UMax32, U32, U32, U32, ) | 261 | OPCODE(UMax32, U32, U32, U32, ) |
| 261 | OPCODE(SLessThan, U1, U32, U32, ) | 262 | OPCODE(SLessThan, U1, U32, U32, ) |
| 262 | OPCODE(ULessThan, U1, U32, U32, ) | 263 | OPCODE(ULessThan, U1, U32, U32, ) |
| 263 | OPCODE(IEqual, U1, U32, U32, ) | 264 | OPCODE(IEqual, U1, U32, U32, ) |
| 264 | OPCODE(SLessThanEqual, U1, U32, U32, ) | 265 | OPCODE(SLessThanEqual, U1, U32, U32, ) |
| 265 | OPCODE(ULessThanEqual, U1, U32, U32, ) | 266 | OPCODE(ULessThanEqual, U1, U32, U32, ) |
| 266 | OPCODE(SGreaterThan, U1, U32, U32, ) | 267 | OPCODE(SGreaterThan, U1, U32, U32, ) |
| 267 | OPCODE(UGreaterThan, U1, U32, U32, ) | 268 | OPCODE(UGreaterThan, U1, U32, U32, ) |
| 268 | OPCODE(INotEqual, U1, U32, U32, ) | 269 | OPCODE(INotEqual, U1, U32, U32, ) |
| 269 | OPCODE(SGreaterThanEqual, U1, U32, U32, ) | 270 | OPCODE(SGreaterThanEqual, U1, U32, U32, ) |
| 270 | OPCODE(UGreaterThanEqual, U1, U32, U32, ) | 271 | OPCODE(UGreaterThanEqual, U1, U32, U32, ) |
| 271 | 272 | ||
| 272 | // Logical operations | 273 | // Logical operations |
| 273 | OPCODE(LogicalOr, U1, U1, U1, ) | 274 | OPCODE(LogicalOr, U1, U1, U1, ) |
| 274 | OPCODE(LogicalAnd, U1, U1, U1, ) | 275 | OPCODE(LogicalAnd, U1, U1, U1, ) |
| 275 | OPCODE(LogicalXor, U1, U1, U1, ) | 276 | OPCODE(LogicalXor, U1, U1, U1, ) |
| 276 | OPCODE(LogicalNot, U1, U1, ) | 277 | OPCODE(LogicalNot, U1, U1, ) |
| 277 | 278 | ||
| 278 | // Conversion operations | 279 | // Conversion operations |
| 279 | OPCODE(ConvertS16F16, U32, F16, ) | 280 | OPCODE(ConvertS16F16, U32, F16, ) |
| 280 | OPCODE(ConvertS16F32, U32, F32, ) | 281 | OPCODE(ConvertS16F32, U32, F32, ) |
| 281 | OPCODE(ConvertS16F64, U32, F64, ) | 282 | OPCODE(ConvertS16F64, U32, F64, ) |
| 282 | OPCODE(ConvertS32F16, U32, F16, ) | 283 | OPCODE(ConvertS32F16, U32, F16, ) |
| 283 | OPCODE(ConvertS32F32, U32, F32, ) | 284 | OPCODE(ConvertS32F32, U32, F32, ) |
| 284 | OPCODE(ConvertS32F64, U32, F64, ) | 285 | OPCODE(ConvertS32F64, U32, F64, ) |
| 285 | OPCODE(ConvertS64F16, U64, F16, ) | 286 | OPCODE(ConvertS64F16, U64, F16, ) |
| 286 | OPCODE(ConvertS64F32, U64, F32, ) | 287 | OPCODE(ConvertS64F32, U64, F32, ) |
| 287 | OPCODE(ConvertS64F64, U64, F64, ) | 288 | OPCODE(ConvertS64F64, U64, F64, ) |
| 288 | OPCODE(ConvertU16F16, U32, F16, ) | 289 | OPCODE(ConvertU16F16, U32, F16, ) |
| 289 | OPCODE(ConvertU16F32, U32, F32, ) | 290 | OPCODE(ConvertU16F32, U32, F32, ) |
| 290 | OPCODE(ConvertU16F64, U32, F64, ) | 291 | OPCODE(ConvertU16F64, U32, F64, ) |
| 291 | OPCODE(ConvertU32F16, U32, F16, ) | 292 | OPCODE(ConvertU32F16, U32, F16, ) |
| 292 | OPCODE(ConvertU32F32, U32, F32, ) | 293 | OPCODE(ConvertU32F32, U32, F32, ) |
| 293 | OPCODE(ConvertU32F64, U32, F64, ) | 294 | OPCODE(ConvertU32F64, U32, F64, ) |
| 294 | OPCODE(ConvertU64F16, U64, F16, ) | 295 | OPCODE(ConvertU64F16, U64, F16, ) |
| 295 | OPCODE(ConvertU64F32, U64, F32, ) | 296 | OPCODE(ConvertU64F32, U64, F32, ) |
| 296 | OPCODE(ConvertU64F64, U64, F64, ) | 297 | OPCODE(ConvertU64F64, U64, F64, ) |
| 297 | OPCODE(ConvertU64U32, U64, U32, ) | 298 | OPCODE(ConvertU64U32, U64, U32, ) |
| 298 | OPCODE(ConvertU32U64, U32, U64, ) | 299 | OPCODE(ConvertU32U64, U32, U64, ) |
| 299 | OPCODE(ConvertF16F32, F16, F32, ) | 300 | OPCODE(ConvertF16F32, F16, F32, ) |
| 300 | OPCODE(ConvertF32F16, F32, F16, ) | 301 | OPCODE(ConvertF32F16, F32, F16, ) |
| 301 | OPCODE(ConvertF32F64, F32, F64, ) | 302 | OPCODE(ConvertF32F64, F32, F64, ) |
| 302 | OPCODE(ConvertF64F32, F64, F32, ) | 303 | OPCODE(ConvertF64F32, F64, F32, ) |
| 304 | OPCODE(ConvertF16S32, F16, U32, ) | ||
| 305 | OPCODE(ConvertF16S64, F16, U64, ) | ||
| 306 | OPCODE(ConvertF16U32, F16, U32, ) | ||
| 307 | OPCODE(ConvertF16U64, F16, U64, ) | ||
| 308 | OPCODE(ConvertF32S32, F32, U32, ) | ||
| 309 | OPCODE(ConvertF32S64, F32, U64, ) | ||
| 310 | OPCODE(ConvertF32U32, F32, U32, ) | ||
| 311 | OPCODE(ConvertF32U64, F32, U64, ) | ||
| 312 | OPCODE(ConvertF64S32, F64, U32, ) | ||
| 313 | OPCODE(ConvertF64S64, F64, U64, ) | ||
| 314 | OPCODE(ConvertF64U32, F64, U32, ) | ||
| 315 | OPCODE(ConvertF64U64, F64, U64, ) | ||
| 316 | |||
| 317 | // Image operations | ||
| 318 | OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 319 | OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 320 | OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 321 | OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 322 | |||
| 323 | OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 324 | OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 325 | OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 326 | OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 327 | |||
| 328 | OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 329 | OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 330 | OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 331 | OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 771094eb9..8fea05f7b 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h | |||
| @@ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256; | |||
| 293 | return reg + (-num); | 293 | return reg + (-num); |
| 294 | } | 294 | } |
| 295 | 295 | ||
| 296 | [[nodiscard]] constexpr Reg operator++(Reg& reg) { | ||
| 297 | reg = reg + 1; | ||
| 298 | return reg; | ||
| 299 | } | ||
| 300 | |||
| 301 | [[nodiscard]] constexpr Reg operator++(Reg& reg, int) { | ||
| 302 | const Reg copy{reg}; | ||
| 303 | reg = reg + 1; | ||
| 304 | return copy; | ||
| 305 | } | ||
| 306 | |||
| 296 | [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { | 307 | [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { |
| 297 | return static_cast<size_t>(reg); | 308 | return static_cast<size_t>(reg); |
| 298 | } | 309 | } |
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 9b7e1480b..3602883d6 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h | |||
| @@ -75,6 +75,7 @@ private: | |||
| 75 | f64 imm_f64; | 75 | f64 imm_f64; |
| 76 | }; | 76 | }; |
| 77 | }; | 77 | }; |
| 78 | static_assert(std::is_trivially_copyable_v<Value>); | ||
| 78 | 79 | ||
| 79 | template <IR::Type type_> | 80 | template <IR::Type type_> |
| 80 | class TypedValue : public Value { | 81 | class TypedValue : public Value { |
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 5d0b91598..f2a2ff331 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc | |||
| @@ -249,8 +249,8 @@ INST(SULD, "SULD", "1110 1011 000- ----") | |||
| 249 | INST(SURED, "SURED", "1110 1011 010- ----") | 249 | INST(SURED, "SURED", "1110 1011 010- ----") |
| 250 | INST(SUST, "SUST", "1110 1011 001- ----") | 250 | INST(SUST, "SUST", "1110 1011 001- ----") |
| 251 | INST(SYNC, "SYNC", "1111 0000 1111 1---") | 251 | INST(SYNC, "SYNC", "1111 0000 1111 1---") |
| 252 | INST(TEX, "TEX", "1100 00-- --11 1---") | 252 | INST(TEX, "TEX", "1100 0--- ---- ----") |
| 253 | INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") | 253 | INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") |
| 254 | INST(TEXS, "TEXS", "1101 -00- ---- ----") | 254 | INST(TEXS, "TEXS", "1101 -00- ---- ----") |
| 255 | INST(TLD, "TLD", "1101 1100 --11 1---") | 255 | INST(TLD, "TLD", "1101 1100 --11 1---") |
| 256 | INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") | 256 | INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") |
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dbfc04f75..b270bbccd 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 62 | Optimization::SsaRewritePass(function.post_order_blocks); | 62 | Optimization::SsaRewritePass(function.post_order_blocks); |
| 63 | } | 63 | } |
| 64 | Optimization::GlobalMemoryToStorageBufferPass(program); | 64 | Optimization::GlobalMemoryToStorageBufferPass(program); |
| 65 | Optimization::TexturePass(env, program); | ||
| 65 | for (IR::Function& function : functions) { | 66 | for (IR::Function& function : functions) { |
| 66 | Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); | 67 | Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); |
| 67 | Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); | 68 | Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index fc6030e04..ff429c126 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -585,14 +585,6 @@ void TranslatorVisitor::SYNC(u64) { | |||
| 585 | ThrowNotImplemented(Opcode::SYNC); | 585 | ThrowNotImplemented(Opcode::SYNC); |
| 586 | } | 586 | } |
| 587 | 587 | ||
| 588 | void TranslatorVisitor::TEX(u64) { | ||
| 589 | ThrowNotImplemented(Opcode::TEX); | ||
| 590 | } | ||
| 591 | |||
| 592 | void TranslatorVisitor::TEX_b(u64) { | ||
| 593 | ThrowNotImplemented(Opcode::TEX_b); | ||
| 594 | } | ||
| 595 | |||
| 596 | void TranslatorVisitor::TEXS(u64) { | 588 | void TranslatorVisitor::TEXS(u64) { |
| 597 | ThrowNotImplemented(Opcode::TEXS); | 589 | ThrowNotImplemented(Opcode::TEXS); |
| 598 | } | 590 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp new file mode 100644 index 000000000..98d9f4c64 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp | |||
| @@ -0,0 +1,232 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Blod : u64 { | ||
| 15 | None, | ||
| 16 | LZ, | ||
| 17 | LB, | ||
| 18 | LL, | ||
| 19 | INVALIDBLOD4, | ||
| 20 | INVALIDBLOD5, | ||
| 21 | LBA, | ||
| 22 | LLA, | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum class TextureType : u64 { | ||
| 26 | _1D, | ||
| 27 | ARRAY_1D, | ||
| 28 | _2D, | ||
| 29 | ARRAY_2D, | ||
| 30 | _3D, | ||
| 31 | ARRAY_3D, | ||
| 32 | CUBE, | ||
| 33 | ARRAY_CUBE, | ||
| 34 | }; | ||
| 35 | |||
| 36 | Shader::TextureType GetType(TextureType type, bool dc) { | ||
| 37 | switch (type) { | ||
| 38 | case TextureType::_1D: | ||
| 39 | return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; | ||
| 40 | case TextureType::ARRAY_1D: | ||
| 41 | return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; | ||
| 42 | case TextureType::_2D: | ||
| 43 | return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; | ||
| 44 | case TextureType::ARRAY_2D: | ||
| 45 | return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; | ||
| 46 | case TextureType::_3D: | ||
| 47 | return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; | ||
| 48 | case TextureType::ARRAY_3D: | ||
| 49 | throw NotImplementedException("3D array texture type"); | ||
| 50 | case TextureType::CUBE: | ||
| 51 | return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; | ||
| 52 | case TextureType::ARRAY_CUBE: | ||
| 53 | return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; | ||
| 54 | } | ||
| 55 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 56 | } | ||
| 57 | |||
| 58 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 59 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; | ||
| 60 | switch (type) { | ||
| 61 | case TextureType::_1D: | ||
| 62 | return v.F(reg); | ||
| 63 | case TextureType::ARRAY_1D: | ||
| 64 | return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); | ||
| 65 | case TextureType::_2D: | ||
| 66 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 67 | case TextureType::ARRAY_2D: | ||
| 68 | return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); | ||
| 69 | case TextureType::_3D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 71 | case TextureType::ARRAY_3D: | ||
| 72 | throw NotImplementedException("3D array texture type"); | ||
| 73 | case TextureType::CUBE: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_CUBE: | ||
| 76 | return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); | ||
| 77 | } | ||
| 78 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 79 | } | ||
| 80 | |||
| 81 | IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { | ||
| 82 | switch (blod) { | ||
| 83 | case Blod::None: | ||
| 84 | return v.ir.Imm32(0.0f); | ||
| 85 | case Blod::LZ: | ||
| 86 | return v.ir.Imm32(0.0f); | ||
| 87 | case Blod::LB: | ||
| 88 | case Blod::LL: | ||
| 89 | case Blod::LBA: | ||
| 90 | case Blod::LLA: | ||
| 91 | return v.F(reg++); | ||
| 92 | case Blod::INVALIDBLOD4: | ||
| 93 | case Blod::INVALIDBLOD5: | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | throw NotImplementedException("Invalid blod {}", blod); | ||
| 97 | } | ||
| 98 | |||
| 99 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 100 | const IR::U32 value{v.X(reg++)}; | ||
| 101 | switch (type) { | ||
| 102 | case TextureType::_1D: | ||
| 103 | case TextureType::ARRAY_1D: | ||
| 104 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); | ||
| 105 | case TextureType::_2D: | ||
| 106 | case TextureType::ARRAY_2D: | ||
| 107 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), | ||
| 108 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); | ||
| 109 | case TextureType::_3D: | ||
| 110 | case TextureType::ARRAY_3D: | ||
| 111 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), | ||
| 112 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), | ||
| 113 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); | ||
| 114 | case TextureType::CUBE: | ||
| 115 | case TextureType::ARRAY_CUBE: | ||
| 116 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 117 | } | ||
| 118 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 119 | } | ||
| 120 | |||
| 121 | bool HasExplicitLod(Blod blod) { | ||
| 122 | switch (blod) { | ||
| 123 | case Blod::LL: | ||
| 124 | case Blod::LLA: | ||
| 125 | case Blod::LZ: | ||
| 126 | return true; | ||
| 127 | default: | ||
| 128 | return false; | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, | ||
| 133 | std::optional<u32> cbuf_offset) { | ||
| 134 | union { | ||
| 135 | u64 raw; | ||
| 136 | BitField<35, 1, u64> ndv; | ||
| 137 | BitField<49, 1, u64> nodep; | ||
| 138 | BitField<50, 1, u64> dc; | ||
| 139 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 140 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 141 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 142 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 143 | BitField<28, 3, TextureType> type; | ||
| 144 | BitField<31, 4, u64> mask; | ||
| 145 | } const tex{insn}; | ||
| 146 | |||
| 147 | if (lc) { | ||
| 148 | throw NotImplementedException("LC"); | ||
| 149 | } | ||
| 150 | const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; | ||
| 151 | |||
| 152 | IR::Reg meta_reg{tex.meta_reg}; | ||
| 153 | IR::Value handle; | ||
| 154 | IR::Value offset; | ||
| 155 | IR::F32 dref; | ||
| 156 | IR::F32 lod_clamp; | ||
| 157 | if (cbuf_offset) { | ||
| 158 | handle = v.ir.Imm32(*cbuf_offset); | ||
| 159 | } else { | ||
| 160 | handle = v.X(meta_reg++); | ||
| 161 | } | ||
| 162 | const IR::F32 lod{MakeLod(v, meta_reg, blod)}; | ||
| 163 | if (aoffi) { | ||
| 164 | offset = MakeOffset(v, meta_reg, tex.type); | ||
| 165 | } | ||
| 166 | if (tex.dc != 0) { | ||
| 167 | dref = v.F(meta_reg++); | ||
| 168 | } | ||
| 169 | IR::TextureInstInfo info{}; | ||
| 170 | info.type.Assign(GetType(tex.type, tex.dc != 0)); | ||
| 171 | info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); | ||
| 172 | info.has_lod_clamp.Assign(lc ? 1 : 0); | ||
| 173 | |||
| 174 | const IR::Value sample{[&]() -> IR::Value { | ||
| 175 | if (tex.dc == 0) { | ||
| 176 | if (HasExplicitLod(blod)) { | ||
| 177 | return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); | ||
| 178 | } else { | ||
| 179 | return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); | ||
| 180 | } | ||
| 181 | } | ||
| 182 | if (HasExplicitLod(blod)) { | ||
| 183 | return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, | ||
| 184 | info); | ||
| 185 | } else { | ||
| 186 | return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, | ||
| 187 | info); | ||
| 188 | } | ||
| 189 | }()}; | ||
| 190 | |||
| 191 | for (int element = 0; element < 4; ++element) { | ||
| 192 | if (((tex.mask >> element) & 1) == 0) { | ||
| 193 | continue; | ||
| 194 | } | ||
| 195 | IR::F32 value; | ||
| 196 | if (tex.dc != 0) { | ||
| 197 | value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); | ||
| 198 | } else { | ||
| 199 | value = IR::F32{v.ir.CompositeExtract(sample, element)}; | ||
| 200 | } | ||
| 201 | v.F(tex.dest_reg + element, value); | ||
| 202 | } | ||
| 203 | if (tex.sparse_pred != IR::Pred::PT) { | ||
| 204 | v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 205 | } | ||
| 206 | } | ||
| 207 | } // Anonymous namespace | ||
| 208 | |||
| 209 | void TranslatorVisitor::TEX(u64 insn) { | ||
| 210 | union { | ||
| 211 | u64 raw; | ||
| 212 | BitField<54, 1, u64> aoffi; | ||
| 213 | BitField<55, 3, Blod> blod; | ||
| 214 | BitField<58, 1, u64> lc; | ||
| 215 | BitField<36, 13, u64> cbuf_offset; | ||
| 216 | } const tex{insn}; | ||
| 217 | |||
| 218 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset)); | ||
| 219 | } | ||
| 220 | |||
| 221 | void TranslatorVisitor::TEX_b(u64 insn) { | ||
| 222 | union { | ||
| 223 | u64 raw; | ||
| 224 | BitField<36, 1, u64> aoffi; | ||
| 225 | BitField<37, 3, Blod> blod; | ||
| 226 | BitField<40, 1, u64> lc; | ||
| 227 | } const tex{insn}; | ||
| 228 | |||
| 229 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); | ||
| 230 | } | ||
| 231 | |||
| 232 | } // namespace Shader::Maxwell | ||