diff options
| author | 2021-03-03 03:07:19 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:23 -0400 | |
| commit | 4006929c986a2e0e52429fe21201a7ad5ca3fea9 (patch) | |
| tree | 9f4a1ffa7782ed76db5561e107e8ae9f71f63a15 /src/shader_recompiler/frontend/ir | |
| parent | shader: Implement LOP and LOP3 (diff) | |
| download | yuzu-4006929c986a2e0e52429fe21201a7ad5ca3fea9.tar.gz yuzu-4006929c986a2e0e52429fe21201a7ad5ca3fea9.tar.xz yuzu-4006929c986a2e0e52429fe21201a7ad5ca3fea9.zip | |
shader: Implement HADD2
Diffstat (limited to '')
| -rw-r--r-- | src/shader_recompiler/frontend/ir/ir_emitter.cpp | 90 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/ir_emitter.h | 4 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/ir/opcodes.inc | 17 |
3 files changed, 105 insertions, 6 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0f1cab57a..186920d8f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -334,12 +334,12 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu | |||
| 334 | } | 334 | } |
| 335 | 335 | ||
| 336 | Value IREmitter::CompositeExtract(const Value& vector, size_t element) { | 336 | Value IREmitter::CompositeExtract(const Value& vector, size_t element) { |
| 337 | const auto read = [&](Opcode opcode, size_t limit) -> Value { | 337 | const auto read{[&](Opcode opcode, size_t limit) -> Value { |
| 338 | if (element >= limit) { | 338 | if (element >= limit) { |
| 339 | throw InvalidArgument("Out of bounds element {}", element); | 339 | throw InvalidArgument("Out of bounds element {}", element); |
| 340 | } | 340 | } |
| 341 | return Inst(opcode, vector, Value{static_cast<u32>(element)}); | 341 | return Inst(opcode, vector, Value{static_cast<u32>(element)}); |
| 342 | }; | 342 | }}; |
| 343 | switch (vector.Type()) { | 343 | switch (vector.Type()) { |
| 344 | case Type::U32x2: | 344 | case Type::U32x2: |
| 345 | return read(Opcode::CompositeExtractU32x2, 2); | 345 | return read(Opcode::CompositeExtractU32x2, 2); |
| @@ -370,6 +370,43 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) { | |||
| 370 | } | 370 | } |
| 371 | } | 371 | } |
| 372 | 372 | ||
| 373 | Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { | ||
| 374 | const auto insert{[&](Opcode opcode, size_t limit) { | ||
| 375 | if (element >= limit) { | ||
| 376 | throw InvalidArgument("Out of bounds element {}", element); | ||
| 377 | } | ||
| 378 | return Inst(opcode, vector, object, Value{static_cast<u32>(element)}); | ||
| 379 | }}; | ||
| 380 | switch (vector.Type()) { | ||
| 381 | case Type::U32x2: | ||
| 382 | return insert(Opcode::CompositeInsertU32x2, 2); | ||
| 383 | case Type::U32x3: | ||
| 384 | return insert(Opcode::CompositeInsertU32x3, 3); | ||
| 385 | case Type::U32x4: | ||
| 386 | return insert(Opcode::CompositeInsertU32x4, 4); | ||
| 387 | case Type::F16x2: | ||
| 388 | return insert(Opcode::CompositeInsertF16x2, 2); | ||
| 389 | case Type::F16x3: | ||
| 390 | return insert(Opcode::CompositeInsertF16x3, 3); | ||
| 391 | case Type::F16x4: | ||
| 392 | return insert(Opcode::CompositeInsertF16x4, 4); | ||
| 393 | case Type::F32x2: | ||
| 394 | return insert(Opcode::CompositeInsertF32x2, 2); | ||
| 395 | case Type::F32x3: | ||
| 396 | return insert(Opcode::CompositeInsertF32x3, 3); | ||
| 397 | case Type::F32x4: | ||
| 398 | return insert(Opcode::CompositeInsertF32x4, 4); | ||
| 399 | case Type::F64x2: | ||
| 400 | return insert(Opcode::CompositeInsertF64x2, 2); | ||
| 401 | case Type::F64x3: | ||
| 402 | return insert(Opcode::CompositeInsertF64x3, 3); | ||
| 403 | case Type::F64x4: | ||
| 404 | return insert(Opcode::CompositeInsertF64x4, 4); | ||
| 405 | default: | ||
| 406 | ThrowInvalidType(vector.Type()); | ||
| 407 | } | ||
| 408 | } | ||
| 409 | |||
| 373 | Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { | 410 | Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { |
| 374 | if (true_value.Type() != false_value.Type()) { | 411 | if (true_value.Type() != false_value.Type()) { |
| 375 | throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); | 412 | throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); |
| @@ -433,7 +470,7 @@ U32 IREmitter::PackFloat2x16(const Value& vector) { | |||
| 433 | } | 470 | } |
| 434 | 471 | ||
| 435 | Value IREmitter::UnpackFloat2x16(const U32& value) { | 472 | Value IREmitter::UnpackFloat2x16(const U32& value) { |
| 436 | return Inst<Value>(Opcode::UnpackFloat2x16, value); | 473 | return Inst(Opcode::UnpackFloat2x16, value); |
| 437 | } | 474 | } |
| 438 | 475 | ||
| 439 | F64 IREmitter::PackDouble2x32(const Value& vector) { | 476 | F64 IREmitter::PackDouble2x32(const Value& vector) { |
| @@ -968,7 +1005,7 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v | |||
| 968 | } | 1005 | } |
| 969 | } | 1006 | } |
| 970 | 1007 | ||
| 971 | U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { | 1008 | U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { |
| 972 | switch (result_bitsize) { | 1009 | switch (result_bitsize) { |
| 973 | case 32: | 1010 | case 32: |
| 974 | switch (value.Type()) { | 1011 | switch (value.Type()) { |
| @@ -995,4 +1032,49 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { | |||
| 995 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | 1032 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); |
| 996 | } | 1033 | } |
| 997 | 1034 | ||
| 1035 | F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | ||
| 1036 | switch (result_bitsize) { | ||
| 1037 | case 16: | ||
| 1038 | switch (value.Type()) { | ||
| 1039 | case Type::F16: | ||
| 1040 | // Nothing to do | ||
| 1041 | return value; | ||
| 1042 | case Type::F32: | ||
| 1043 | return Inst<F16>(Opcode::ConvertF16F32, value); | ||
| 1044 | case Type::F64: | ||
| 1045 | throw LogicError("Illegal conversion from F64 to F16"); | ||
| 1046 | default: | ||
| 1047 | break; | ||
| 1048 | } | ||
| 1049 | break; | ||
| 1050 | case 32: | ||
| 1051 | switch (value.Type()) { | ||
| 1052 | case Type::F16: | ||
| 1053 | return Inst<F32>(Opcode::ConvertF32F16, value); | ||
| 1054 | case Type::F32: | ||
| 1055 | // Nothing to do | ||
| 1056 | return value; | ||
| 1057 | case Type::F64: | ||
| 1058 | return Inst<F64>(Opcode::ConvertF32F64, value); | ||
| 1059 | default: | ||
| 1060 | break; | ||
| 1061 | } | ||
| 1062 | break; | ||
| 1063 | case 64: | ||
| 1064 | switch (value.Type()) { | ||
| 1065 | case Type::F16: | ||
| 1066 | throw LogicError("Illegal conversion from F16 to F64"); | ||
| 1067 | case Type::F32: | ||
| 1068 | // Nothing to do | ||
| 1069 | return value; | ||
| 1070 | case Type::F64: | ||
| 1071 | return Inst<F64>(Opcode::ConvertF32F64, value); | ||
| 1072 | default: | ||
| 1073 | break; | ||
| 1074 | } | ||
| 1075 | break; | ||
| 1076 | } | ||
| 1077 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | ||
| 1078 | } | ||
| 1079 | |||
| 998 | } // namespace Shader::IR | 1080 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 03a67985f..5beb99895 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -97,6 +97,7 @@ public: | |||
| 97 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, | 97 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, |
| 98 | const Value& e4); | 98 | const Value& e4); |
| 99 | [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); | 99 | [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); |
| 100 | [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); | ||
| 100 | 101 | ||
| 101 | [[nodiscard]] Value Select(const U1& condition, const Value& true_value, | 102 | [[nodiscard]] Value Select(const U1& condition, const Value& true_value, |
| 102 | const Value& false_value); | 103 | const Value& false_value); |
| @@ -186,7 +187,8 @@ public: | |||
| 186 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); | 187 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); |
| 187 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); | 188 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); |
| 188 | 189 | ||
| 189 | [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); | 190 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); |
| 191 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); | ||
| 190 | 192 | ||
| 191 | private: | 193 | private: |
| 192 | IR::Block::iterator insertion_point; | 194 | IR::Block::iterator insertion_point; |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index aedbc5c3e..acfc0a829 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -83,24 +83,36 @@ OPCODE(CompositeConstructU32x4, U32x4, U32, | |||
| 83 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) | 83 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) |
| 84 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) | 84 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) |
| 85 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) | 85 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) |
| 86 | OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) | ||
| 87 | OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) | ||
| 88 | OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) | ||
| 86 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) | 89 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) |
| 87 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) | 90 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) |
| 88 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) | 91 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) |
| 89 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) | 92 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) |
| 90 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) | 93 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) |
| 91 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) | 94 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) |
| 95 | OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) | ||
| 96 | OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) | ||
| 97 | OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) | ||
| 92 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) | 98 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) |
| 93 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) | 99 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) |
| 94 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) | 100 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) |
| 95 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) | 101 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) |
| 96 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) | 102 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) |
| 97 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) | 103 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) |
| 104 | OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) | ||
| 105 | OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) | ||
| 106 | OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) | ||
| 98 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) | 107 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) |
| 99 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) | 108 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) |
| 100 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) | 109 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) |
| 101 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) | 110 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) |
| 102 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) | 111 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) |
| 103 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) | 112 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) |
| 113 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) | ||
| 114 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) | ||
| 115 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) | ||
| 104 | 116 | ||
| 105 | // Select operations | 117 | // Select operations |
| 106 | OPCODE(SelectU8, U8, U1, U8, U8, ) | 118 | OPCODE(SelectU8, U8, U1, U8, U8, ) |
| @@ -277,6 +289,9 @@ OPCODE(ConvertU32F64, U32, F64, | |||
| 277 | OPCODE(ConvertU64F16, U64, F16, ) | 289 | OPCODE(ConvertU64F16, U64, F16, ) |
| 278 | OPCODE(ConvertU64F32, U64, F32, ) | 290 | OPCODE(ConvertU64F32, U64, F32, ) |
| 279 | OPCODE(ConvertU64F64, U64, F64, ) | 291 | OPCODE(ConvertU64F64, U64, F64, ) |
| 280 | |||
| 281 | OPCODE(ConvertU64U32, U64, U32, ) | 292 | OPCODE(ConvertU64U32, U64, U32, ) |
| 282 | OPCODE(ConvertU32U64, U32, U64, ) | 293 | OPCODE(ConvertU32U64, U32, U64, ) |
| 294 | OPCODE(ConvertF16F32, F16, F32, ) | ||
| 295 | OPCODE(ConvertF32F16, F32, F16, ) | ||
| 296 | OPCODE(ConvertF32F64, F32, F64, ) | ||
| 297 | OPCODE(ConvertF64F32, F64, F32, ) | ||