diff options
| author | 2021-03-03 03:07:19 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:23 -0400 | |
| commit | 4006929c986a2e0e52429fe21201a7ad5ca3fea9 (patch) | |
| tree | 9f4a1ffa7782ed76db5561e107e8ae9f71f63a15 /src/shader_recompiler/frontend | |
| parent | shader: Implement LOP and LOP3 (diff) | |
| download | yuzu-4006929c986a2e0e52429fe21201a7ad5ca3fea9.tar.gz yuzu-4006929c986a2e0e52429fe21201a7ad5ca3fea9.tar.xz yuzu-4006929c986a2e0e52429fe21201a7ad5ca3fea9.zip | |
shader: Implement HADD2
Diffstat (limited to '')
6 files changed, 290 insertions, 23 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0f1cab57a..186920d8f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -334,12 +334,12 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu | |||
| 334 | } | 334 | } |
| 335 | 335 | ||
| 336 | Value IREmitter::CompositeExtract(const Value& vector, size_t element) { | 336 | Value IREmitter::CompositeExtract(const Value& vector, size_t element) { |
| 337 | const auto read = [&](Opcode opcode, size_t limit) -> Value { | 337 | const auto read{[&](Opcode opcode, size_t limit) -> Value { |
| 338 | if (element >= limit) { | 338 | if (element >= limit) { |
| 339 | throw InvalidArgument("Out of bounds element {}", element); | 339 | throw InvalidArgument("Out of bounds element {}", element); |
| 340 | } | 340 | } |
| 341 | return Inst(opcode, vector, Value{static_cast<u32>(element)}); | 341 | return Inst(opcode, vector, Value{static_cast<u32>(element)}); |
| 342 | }; | 342 | }}; |
| 343 | switch (vector.Type()) { | 343 | switch (vector.Type()) { |
| 344 | case Type::U32x2: | 344 | case Type::U32x2: |
| 345 | return read(Opcode::CompositeExtractU32x2, 2); | 345 | return read(Opcode::CompositeExtractU32x2, 2); |
| @@ -370,6 +370,43 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) { | |||
| 370 | } | 370 | } |
| 371 | } | 371 | } |
| 372 | 372 | ||
| 373 | Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { | ||
| 374 | const auto insert{[&](Opcode opcode, size_t limit) { | ||
| 375 | if (element >= limit) { | ||
| 376 | throw InvalidArgument("Out of bounds element {}", element); | ||
| 377 | } | ||
| 378 | return Inst(opcode, vector, object, Value{static_cast<u32>(element)}); | ||
| 379 | }}; | ||
| 380 | switch (vector.Type()) { | ||
| 381 | case Type::U32x2: | ||
| 382 | return insert(Opcode::CompositeInsertU32x2, 2); | ||
| 383 | case Type::U32x3: | ||
| 384 | return insert(Opcode::CompositeInsertU32x3, 3); | ||
| 385 | case Type::U32x4: | ||
| 386 | return insert(Opcode::CompositeInsertU32x4, 4); | ||
| 387 | case Type::F16x2: | ||
| 388 | return insert(Opcode::CompositeInsertF16x2, 2); | ||
| 389 | case Type::F16x3: | ||
| 390 | return insert(Opcode::CompositeInsertF16x3, 3); | ||
| 391 | case Type::F16x4: | ||
| 392 | return insert(Opcode::CompositeInsertF16x4, 4); | ||
| 393 | case Type::F32x2: | ||
| 394 | return insert(Opcode::CompositeInsertF32x2, 2); | ||
| 395 | case Type::F32x3: | ||
| 396 | return insert(Opcode::CompositeInsertF32x3, 3); | ||
| 397 | case Type::F32x4: | ||
| 398 | return insert(Opcode::CompositeInsertF32x4, 4); | ||
| 399 | case Type::F64x2: | ||
| 400 | return insert(Opcode::CompositeInsertF64x2, 2); | ||
| 401 | case Type::F64x3: | ||
| 402 | return insert(Opcode::CompositeInsertF64x3, 3); | ||
| 403 | case Type::F64x4: | ||
| 404 | return insert(Opcode::CompositeInsertF64x4, 4); | ||
| 405 | default: | ||
| 406 | ThrowInvalidType(vector.Type()); | ||
| 407 | } | ||
| 408 | } | ||
| 409 | |||
| 373 | Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { | 410 | Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { |
| 374 | if (true_value.Type() != false_value.Type()) { | 411 | if (true_value.Type() != false_value.Type()) { |
| 375 | throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); | 412 | throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); |
| @@ -433,7 +470,7 @@ U32 IREmitter::PackFloat2x16(const Value& vector) { | |||
| 433 | } | 470 | } |
| 434 | 471 | ||
| 435 | Value IREmitter::UnpackFloat2x16(const U32& value) { | 472 | Value IREmitter::UnpackFloat2x16(const U32& value) { |
| 436 | return Inst<Value>(Opcode::UnpackFloat2x16, value); | 473 | return Inst(Opcode::UnpackFloat2x16, value); |
| 437 | } | 474 | } |
| 438 | 475 | ||
| 439 | F64 IREmitter::PackDouble2x32(const Value& vector) { | 476 | F64 IREmitter::PackDouble2x32(const Value& vector) { |
| @@ -968,7 +1005,7 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v | |||
| 968 | } | 1005 | } |
| 969 | } | 1006 | } |
| 970 | 1007 | ||
| 971 | U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { | 1008 | U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { |
| 972 | switch (result_bitsize) { | 1009 | switch (result_bitsize) { |
| 973 | case 32: | 1010 | case 32: |
| 974 | switch (value.Type()) { | 1011 | switch (value.Type()) { |
| @@ -995,4 +1032,49 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { | |||
| 995 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | 1032 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); |
| 996 | } | 1033 | } |
| 997 | 1034 | ||
| 1035 | F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | ||
| 1036 | switch (result_bitsize) { | ||
| 1037 | case 16: | ||
| 1038 | switch (value.Type()) { | ||
| 1039 | case Type::F16: | ||
| 1040 | // Nothing to do | ||
| 1041 | return value; | ||
| 1042 | case Type::F32: | ||
| 1043 | return Inst<F16>(Opcode::ConvertF16F32, value); | ||
| 1044 | case Type::F64: | ||
| 1045 | throw LogicError("Illegal conversion from F64 to F16"); | ||
| 1046 | default: | ||
| 1047 | break; | ||
| 1048 | } | ||
| 1049 | break; | ||
| 1050 | case 32: | ||
| 1051 | switch (value.Type()) { | ||
| 1052 | case Type::F16: | ||
| 1053 | return Inst<F32>(Opcode::ConvertF32F16, value); | ||
| 1054 | case Type::F32: | ||
| 1055 | // Nothing to do | ||
| 1056 | return value; | ||
| 1057 | case Type::F64: | ||
| 1058 | return Inst<F64>(Opcode::ConvertF32F64, value); | ||
| 1059 | default: | ||
| 1060 | break; | ||
| 1061 | } | ||
| 1062 | break; | ||
| 1063 | case 64: | ||
| 1064 | switch (value.Type()) { | ||
| 1065 | case Type::F16: | ||
| 1066 | throw LogicError("Illegal conversion from F16 to F64"); | ||
| 1067 | case Type::F32: | ||
| 1068 | // Nothing to do | ||
| 1069 | return value; | ||
| 1070 | case Type::F64: | ||
| 1071 | return Inst<F64>(Opcode::ConvertF32F64, value); | ||
| 1072 | default: | ||
| 1073 | break; | ||
| 1074 | } | ||
| 1075 | break; | ||
| 1076 | } | ||
| 1077 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | ||
| 1078 | } | ||
| 1079 | |||
| 998 | } // namespace Shader::IR | 1080 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 03a67985f..5beb99895 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -97,6 +97,7 @@ public: | |||
| 97 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, | 97 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, |
| 98 | const Value& e4); | 98 | const Value& e4); |
| 99 | [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); | 99 | [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); |
| 100 | [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); | ||
| 100 | 101 | ||
| 101 | [[nodiscard]] Value Select(const U1& condition, const Value& true_value, | 102 | [[nodiscard]] Value Select(const U1& condition, const Value& true_value, |
| 102 | const Value& false_value); | 103 | const Value& false_value); |
| @@ -186,7 +187,8 @@ public: | |||
| 186 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); | 187 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); |
| 187 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); | 188 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); |
| 188 | 189 | ||
| 189 | [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); | 190 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); |
| 191 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); | ||
| 190 | 192 | ||
| 191 | private: | 193 | private: |
| 192 | IR::Block::iterator insertion_point; | 194 | IR::Block::iterator insertion_point; |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index aedbc5c3e..acfc0a829 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -83,24 +83,36 @@ OPCODE(CompositeConstructU32x4, U32x4, U32, | |||
| 83 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) | 83 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) |
| 84 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) | 84 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) |
| 85 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) | 85 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) |
| 86 | OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) | ||
| 87 | OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) | ||
| 88 | OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) | ||
| 86 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) | 89 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) |
| 87 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) | 90 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) |
| 88 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) | 91 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) |
| 89 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) | 92 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) |
| 90 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) | 93 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) |
| 91 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) | 94 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) |
| 95 | OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) | ||
| 96 | OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) | ||
| 97 | OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) | ||
| 92 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) | 98 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) |
| 93 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) | 99 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) |
| 94 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) | 100 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) |
| 95 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) | 101 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) |
| 96 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) | 102 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) |
| 97 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) | 103 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) |
| 104 | OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) | ||
| 105 | OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) | ||
| 106 | OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) | ||
| 98 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) | 107 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) |
| 99 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) | 108 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) |
| 100 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) | 109 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) |
| 101 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) | 110 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) |
| 102 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) | 111 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) |
| 103 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) | 112 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) |
| 113 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) | ||
| 114 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) | ||
| 115 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) | ||
| 104 | 116 | ||
| 105 | // Select operations | 117 | // Select operations |
| 106 | OPCODE(SelectU8, U8, U1, U8, U8, ) | 118 | OPCODE(SelectU8, U8, U1, U8, U8, ) |
| @@ -277,6 +289,9 @@ OPCODE(ConvertU32F64, U32, F64, | |||
| 277 | OPCODE(ConvertU64F16, U64, F16, ) | 289 | OPCODE(ConvertU64F16, U64, F16, ) |
| 278 | OPCODE(ConvertU64F32, U64, F32, ) | 290 | OPCODE(ConvertU64F32, U64, F32, ) |
| 279 | OPCODE(ConvertU64F64, U64, F64, ) | 291 | OPCODE(ConvertU64F64, U64, F64, ) |
| 280 | |||
| 281 | OPCODE(ConvertU64U32, U64, U32, ) | 292 | OPCODE(ConvertU64U32, U64, U32, ) |
| 282 | OPCODE(ConvertU32U64, U32, U64, ) | 293 | OPCODE(ConvertU32U64, U32, U64, ) |
| 294 | OPCODE(ConvertF16F32, F16, F32, ) | ||
| 295 | OPCODE(ConvertF32F16, F32, F16, ) | ||
| 296 | OPCODE(ConvertF32F64, F32, F64, ) | ||
| 297 | OPCODE(ConvertF64F32, F64, F32, ) | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..6965adfb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class Merge : u64 { | ||
| 13 | H1_H0, | ||
| 14 | F32, | ||
| 15 | MRG_H0, | ||
| 16 | MRG_H1, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class Swizzle : u64 { | ||
| 20 | H1_H0, | ||
| 21 | F32, | ||
| 22 | H0_H0, | ||
| 23 | H1_H1, | ||
| 24 | }; | ||
| 25 | |||
| 26 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { | ||
| 27 | switch (swizzle) { | ||
| 28 | case Swizzle::H1_H0: { | ||
| 29 | const IR::Value vector{ir.UnpackFloat2x16(value)}; | ||
| 30 | return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; | ||
| 31 | } | ||
| 32 | case Swizzle::H0_H0: { | ||
| 33 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; | ||
| 34 | return {scalar, scalar}; | ||
| 35 | } | ||
| 36 | case Swizzle::H1_H1: { | ||
| 37 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; | ||
| 38 | return {scalar, scalar}; | ||
| 39 | } | ||
| 40 | case Swizzle::F32: { | ||
| 41 | const IR::F32 scalar{ir.BitCast<IR::F32>(value)}; | ||
| 42 | return {scalar, scalar}; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | throw InvalidArgument("Invalid swizzle {}", swizzle); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 49 | Merge merge) { | ||
| 50 | switch (merge) { | ||
| 51 | case Merge::H1_H0: | ||
| 52 | return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); | ||
| 53 | case Merge::F32: | ||
| 54 | return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs)); | ||
| 55 | case Merge::MRG_H0: | ||
| 56 | case Merge::MRG_H1: { | ||
| 57 | const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; | ||
| 58 | const bool h0{merge == Merge::MRG_H0}; | ||
| 59 | const IR::F16& insert{h0 ? lhs : rhs}; | ||
| 60 | return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | throw InvalidArgument("Invalid merge {}", merge); | ||
| 64 | } | ||
| 65 | |||
| 66 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, | ||
| 67 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 68 | union { | ||
| 69 | u64 raw; | ||
| 70 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 71 | BitField<8, 8, IR::Reg> src_a; | ||
| 72 | } const hadd2{insn}; | ||
| 73 | |||
| 74 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; | ||
| 75 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 76 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 77 | if (promotion) { | ||
| 78 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 79 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 80 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 81 | } | ||
| 82 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 83 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 84 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 88 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 89 | |||
| 90 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 91 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 92 | |||
| 93 | const IR::FpControl fp_control{ | ||
| 94 | .no_contraction{true}, | ||
| 95 | .rounding{IR::FpRounding::DontCare}, | ||
| 96 | .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||
| 97 | }; | ||
| 98 | IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; | ||
| 99 | IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; | ||
| 100 | if (sat) { | ||
| 101 | lhs = v.ir.FPSaturate(lhs); | ||
| 102 | rhs = v.ir.FPSaturate(rhs); | ||
| 103 | } | ||
| 104 | if (promotion) { | ||
| 105 | lhs = v.ir.FPConvert(16, lhs); | ||
| 106 | rhs = v.ir.FPConvert(16, rhs); | ||
| 107 | } | ||
| 108 | v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); | ||
| 109 | } | ||
| 110 | } // Anonymous namespace | ||
| 111 | |||
| 112 | void TranslatorVisitor::HADD2_reg(u64 insn) { | ||
| 113 | union { | ||
| 114 | u64 raw; | ||
| 115 | BitField<49, 2, Merge> merge; | ||
| 116 | BitField<39, 1, u64> ftz; | ||
| 117 | BitField<32, 1, u64> sat; | ||
| 118 | BitField<43, 1, u64> neg_a; | ||
| 119 | BitField<44, 1, u64> abs_a; | ||
| 120 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 121 | BitField<31, 1, u64> neg_b; | ||
| 122 | BitField<30, 1, u64> abs_b; | ||
| 123 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 124 | } const hadd2{insn}; | ||
| 125 | |||
| 126 | HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||
| 127 | hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, | ||
| 128 | GetReg20(insn)); | ||
| 129 | } | ||
| 130 | |||
| 131 | void TranslatorVisitor::HADD2_cbuf(u64 insn) { | ||
| 132 | union { | ||
| 133 | u64 raw; | ||
| 134 | BitField<49, 2, Merge> merge; | ||
| 135 | BitField<39, 1, u64> ftz; | ||
| 136 | BitField<52, 1, u64> sat; | ||
| 137 | BitField<43, 1, u64> neg_a; | ||
| 138 | BitField<44, 1, u64> abs_a; | ||
| 139 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 140 | BitField<56, 1, u64> neg_b; | ||
| 141 | BitField<54, 1, u64> abs_b; | ||
| 142 | } const hadd2{insn}; | ||
| 143 | |||
| 144 | HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||
| 145 | hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, | ||
| 146 | GetCbuf(insn)); | ||
| 147 | } | ||
| 148 | |||
| 149 | void TranslatorVisitor::HADD2_imm(u64 insn) { | ||
| 150 | union { | ||
| 151 | u64 raw; | ||
| 152 | BitField<49, 2, Merge> merge; | ||
| 153 | BitField<39, 1, u64> ftz; | ||
| 154 | BitField<52, 1, u64> sat; | ||
| 155 | BitField<43, 1, u64> neg_a; | ||
| 156 | BitField<44, 1, u64> abs_a; | ||
| 157 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 158 | BitField<56, 1, u64> neg_high; | ||
| 159 | BitField<30, 9, u64> high; | ||
| 160 | BitField<29, 1, u64> neg_low; | ||
| 161 | BitField<20, 9, u64> low; | ||
| 162 | } const hadd2{insn}; | ||
| 163 | |||
| 164 | const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 165 | static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 166 | HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||
| 167 | hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 168 | } | ||
| 169 | |||
| 170 | void TranslatorVisitor::HADD2_32I(u64 insn) { | ||
| 171 | union { | ||
| 172 | u64 raw; | ||
| 173 | BitField<55, 1, u64> ftz; | ||
| 174 | BitField<52, 1, u64> sat; | ||
| 175 | BitField<56, 1, u64> neg_a; | ||
| 176 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 177 | BitField<20, 32, u64> imm32; | ||
| 178 | } const hadd2{insn}; | ||
| 179 | |||
| 180 | const u32 imm{static_cast<u32>(hadd2.imm32)}; | ||
| 181 | HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, | ||
| 182 | hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 183 | } | ||
| 184 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 727524284..748b856c9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp | |||
| @@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) { | |||
| 59 | const IR::U64 address{[&]() -> IR::U64 { | 59 | const IR::U64 address{[&]() -> IR::U64 { |
| 60 | if (mem.e == 0) { | 60 | if (mem.e == 0) { |
| 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it | 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it |
| 62 | return v.ir.ConvertU(64, v.X(mem.addr_reg)); | 62 | return v.ir.UConvert(64, v.X(mem.addr_reg)); |
| 63 | } | 63 | } |
| 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { | 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { |
| 65 | throw NotImplementedException("Unaligned address register"); | 65 | throw NotImplementedException("Unaligned address register"); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0535f1c2..c24f29ff7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { | |||
| 265 | ThrowNotImplemented(Opcode::GETLMEMBASE); | 265 | ThrowNotImplemented(Opcode::GETLMEMBASE); |
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | void TranslatorVisitor::HADD2_reg(u64) { | ||
| 269 | ThrowNotImplemented(Opcode::HADD2_reg); | ||
| 270 | } | ||
| 271 | |||
| 272 | void TranslatorVisitor::HADD2_cbuf(u64) { | ||
| 273 | ThrowNotImplemented(Opcode::HADD2_cbuf); | ||
| 274 | } | ||
| 275 | |||
| 276 | void TranslatorVisitor::HADD2_imm(u64) { | ||
| 277 | ThrowNotImplemented(Opcode::HADD2_imm); | ||
| 278 | } | ||
| 279 | |||
| 280 | void TranslatorVisitor::HADD2_32I(u64) { | ||
| 281 | ThrowNotImplemented(Opcode::HADD2_32I); | ||
| 282 | } | ||
| 283 | |||
| 284 | void TranslatorVisitor::HFMA2_reg(u64) { | 268 | void TranslatorVisitor::HFMA2_reg(u64) { |
| 285 | ThrowNotImplemented(Opcode::HFMA2_reg); | 269 | ThrowNotImplemented(Opcode::HFMA2_reg); |
| 286 | } | 270 | } |