diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 145 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 313 |
2 files changed, 458 insertions, 0 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index f356f9a03..e3d67ff87 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -335,6 +335,26 @@ enum class IsberdMode : u64 { | |||
| 335 | 335 | ||
| 336 | enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; | 336 | enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; |
| 337 | 337 | ||
| 338 | enum class HalfType : u64 { | ||
| 339 | H0_H1 = 0, | ||
| 340 | F32 = 1, | ||
| 341 | H0_H0 = 2, | ||
| 342 | H1_H1 = 3, | ||
| 343 | }; | ||
| 344 | |||
| 345 | enum class HalfMerge : u64 { | ||
| 346 | H0_H1 = 0, | ||
| 347 | F32 = 1, | ||
| 348 | Mrg_H0 = 2, | ||
| 349 | Mrg_H1 = 3, | ||
| 350 | }; | ||
| 351 | |||
| 352 | enum class HalfPrecision : u64 { | ||
| 353 | None = 0, | ||
| 354 | FTZ = 1, | ||
| 355 | FMZ = 2, | ||
| 356 | }; | ||
| 357 | |||
| 338 | enum class IpaInterpMode : u64 { | 358 | enum class IpaInterpMode : u64 { |
| 339 | Linear = 0, | 359 | Linear = 0, |
| 340 | Perspective = 1, | 360 | Perspective = 1, |
| @@ -554,6 +574,70 @@ union Instruction { | |||
| 554 | } alu_integer; | 574 | } alu_integer; |
| 555 | 575 | ||
| 556 | union { | 576 | union { |
| 577 | BitField<39, 1, u64> ftz; | ||
| 578 | BitField<32, 1, u64> saturate; | ||
| 579 | BitField<49, 2, HalfMerge> merge; | ||
| 580 | |||
| 581 | BitField<43, 1, u64> negate_a; | ||
| 582 | BitField<44, 1, u64> abs_a; | ||
| 583 | BitField<47, 2, HalfType> type_a; | ||
| 584 | |||
| 585 | BitField<31, 1, u64> negate_b; | ||
| 586 | BitField<30, 1, u64> abs_b; | ||
| 587 | BitField<47, 2, HalfType> type_b; | ||
| 588 | |||
| 589 | BitField<35, 2, HalfType> type_c; | ||
| 590 | } alu_half; | ||
| 591 | |||
| 592 | union { | ||
| 593 | BitField<39, 2, HalfPrecision> precision; | ||
| 594 | BitField<39, 1, u64> ftz; | ||
| 595 | BitField<52, 1, u64> saturate; | ||
| 596 | BitField<49, 2, HalfMerge> merge; | ||
| 597 | |||
| 598 | BitField<43, 1, u64> negate_a; | ||
| 599 | BitField<44, 1, u64> abs_a; | ||
| 600 | BitField<47, 2, HalfType> type_a; | ||
| 601 | } alu_half_imm; | ||
| 602 | |||
| 603 | union { | ||
| 604 | BitField<29, 1, u64> first_negate; | ||
| 605 | BitField<20, 9, u64> first; | ||
| 606 | |||
| 607 | BitField<56, 1, u64> second_negate; | ||
| 608 | BitField<30, 9, u64> second; | ||
| 609 | |||
| 610 | u32 PackImmediates() const { | ||
| 611 | // Immediates are half floats shifted. | ||
| 612 | constexpr u32 imm_shift = 6; | ||
| 613 | return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift))); | ||
| 614 | } | ||
| 615 | } half_imm; | ||
| 616 | |||
| 617 | union { | ||
| 618 | union { | ||
| 619 | BitField<37, 2, HalfPrecision> precision; | ||
| 620 | BitField<32, 1, u64> saturate; | ||
| 621 | |||
| 622 | BitField<30, 1, u64> negate_c; | ||
| 623 | BitField<35, 2, HalfType> type_c; | ||
| 624 | } rr; | ||
| 625 | |||
| 626 | BitField<57, 2, HalfPrecision> precision; | ||
| 627 | BitField<52, 1, u64> saturate; | ||
| 628 | |||
| 629 | BitField<49, 2, HalfMerge> merge; | ||
| 630 | |||
| 631 | BitField<47, 2, HalfType> type_a; | ||
| 632 | |||
| 633 | BitField<56, 1, u64> negate_b; | ||
| 634 | BitField<28, 2, HalfType> type_b; | ||
| 635 | |||
| 636 | BitField<51, 1, u64> negate_c; | ||
| 637 | BitField<53, 2, HalfType> type_reg39; | ||
| 638 | } hfma2; | ||
| 639 | |||
| 640 | union { | ||
| 557 | BitField<40, 1, u64> invert; | 641 | BitField<40, 1, u64> invert; |
| 558 | } popc; | 642 | } popc; |
| 559 | 643 | ||
| @@ -717,6 +801,23 @@ union Instruction { | |||
| 717 | } csetp; | 801 | } csetp; |
| 718 | 802 | ||
| 719 | union { | 803 | union { |
| 804 | BitField<35, 4, PredCondition> cond; | ||
| 805 | BitField<49, 1, u64> h_and; | ||
| 806 | BitField<6, 1, u64> ftz; | ||
| 807 | BitField<45, 2, PredOperation> op; | ||
| 808 | BitField<3, 3, u64> pred3; | ||
| 809 | BitField<0, 3, u64> pred0; | ||
| 810 | BitField<43, 1, u64> negate_a; | ||
| 811 | BitField<44, 1, u64> abs_a; | ||
| 812 | BitField<47, 2, HalfType> type_a; | ||
| 813 | BitField<31, 1, u64> negate_b; | ||
| 814 | BitField<30, 1, u64> abs_b; | ||
| 815 | BitField<28, 2, HalfType> type_b; | ||
| 816 | BitField<42, 1, u64> neg_pred; | ||
| 817 | BitField<39, 3, u64> pred39; | ||
| 818 | } hsetp2; | ||
| 819 | |||
| 820 | union { | ||
| 720 | BitField<39, 3, u64> pred39; | 821 | BitField<39, 3, u64> pred39; |
| 721 | BitField<42, 1, u64> neg_pred; | 822 | BitField<42, 1, u64> neg_pred; |
| 722 | BitField<43, 1, u64> neg_a; | 823 | BitField<43, 1, u64> neg_a; |
| @@ -731,6 +832,21 @@ union Instruction { | |||
| 731 | } fset; | 832 | } fset; |
| 732 | 833 | ||
| 733 | union { | 834 | union { |
| 835 | BitField<49, 1, u64> bf; | ||
| 836 | BitField<35, 3, PredCondition> cond; | ||
| 837 | BitField<50, 1, u64> ftz; | ||
| 838 | BitField<45, 2, PredOperation> op; | ||
| 839 | BitField<43, 1, u64> negate_a; | ||
| 840 | BitField<44, 1, u64> abs_a; | ||
| 841 | BitField<47, 2, HalfType> type_a; | ||
| 842 | BitField<31, 1, u64> negate_b; | ||
| 843 | BitField<30, 1, u64> abs_b; | ||
| 844 | BitField<28, 2, HalfType> type_b; | ||
| 845 | BitField<42, 1, u64> neg_pred; | ||
| 846 | BitField<39, 3, u64> pred39; | ||
| 847 | } hset2; | ||
| 848 | |||
| 849 | union { | ||
| 734 | BitField<39, 3, u64> pred39; | 850 | BitField<39, 3, u64> pred39; |
| 735 | BitField<42, 1, u64> neg_pred; | 851 | BitField<42, 1, u64> neg_pred; |
| 736 | BitField<44, 1, u64> bf; | 852 | BitField<44, 1, u64> bf; |
| @@ -1145,6 +1261,18 @@ public: | |||
| 1145 | LEA_RZ, | 1261 | LEA_RZ, |
| 1146 | LEA_IMM, | 1262 | LEA_IMM, |
| 1147 | LEA_HI, | 1263 | LEA_HI, |
| 1264 | HADD2_C, | ||
| 1265 | HADD2_R, | ||
| 1266 | HADD2_IMM, | ||
| 1267 | HMUL2_C, | ||
| 1268 | HMUL2_R, | ||
| 1269 | HMUL2_IMM, | ||
| 1270 | HFMA2_CR, | ||
| 1271 | HFMA2_RC, | ||
| 1272 | HFMA2_RR, | ||
| 1273 | HFMA2_IMM_R, | ||
| 1274 | HSETP2_R, | ||
| 1275 | HSET2_R, | ||
| 1148 | POPC_C, | 1276 | POPC_C, |
| 1149 | POPC_R, | 1277 | POPC_R, |
| 1150 | POPC_IMM, | 1278 | POPC_IMM, |
| @@ -1218,9 +1346,12 @@ public: | |||
| 1218 | ArithmeticImmediate, | 1346 | ArithmeticImmediate, |
| 1219 | ArithmeticInteger, | 1347 | ArithmeticInteger, |
| 1220 | ArithmeticIntegerImmediate, | 1348 | ArithmeticIntegerImmediate, |
| 1349 | ArithmeticHalf, | ||
| 1350 | ArithmeticHalfImmediate, | ||
| 1221 | Bfe, | 1351 | Bfe, |
| 1222 | Shift, | 1352 | Shift, |
| 1223 | Ffma, | 1353 | Ffma, |
| 1354 | Hfma2, | ||
| 1224 | Flow, | 1355 | Flow, |
| 1225 | Synch, | 1356 | Synch, |
| 1226 | Memory, | 1357 | Memory, |
| @@ -1228,6 +1359,8 @@ public: | |||
| 1228 | FloatSetPredicate, | 1359 | FloatSetPredicate, |
| 1229 | IntegerSet, | 1360 | IntegerSet, |
| 1230 | IntegerSetPredicate, | 1361 | IntegerSetPredicate, |
| 1362 | HalfSet, | ||
| 1363 | HalfSetPredicate, | ||
| 1231 | PredicateSetPredicate, | 1364 | PredicateSetPredicate, |
| 1232 | PredicateSetRegister, | 1365 | PredicateSetRegister, |
| 1233 | Conversion, | 1366 | Conversion, |
| @@ -1389,6 +1522,18 @@ private: | |||
| 1389 | INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), | 1522 | INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), |
| 1390 | INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), | 1523 | INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), |
| 1391 | INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), | 1524 | INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), |
| 1525 | INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), | ||
| 1526 | INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), | ||
| 1527 | INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"), | ||
| 1528 | INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), | ||
| 1529 | INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), | ||
| 1530 | INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), | ||
| 1531 | INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), | ||
| 1532 | INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), | ||
| 1533 | INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), | ||
| 1534 | INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), | ||
| 1535 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), | ||
| 1536 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), | ||
| 1392 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), | 1537 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), |
| 1393 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), | 1538 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), |
| 1394 | INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), | 1539 | INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index f4340a017..e050b063a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -376,6 +376,49 @@ public: | |||
| 376 | } | 376 | } |
| 377 | 377 | ||
| 378 | /** | 378 | /** |
| 379 | * Writes code that does a register assignment to a half float value operation. | ||
| 380 | * @param reg The destination register to use. | ||
| 381 | * @param elem The element to use for the operation. | ||
| 382 | * @param value The code representing the value to assign. Type has to be half float. | ||
| 383 | * @param type Half float kind of assignment. | ||
| 384 | * @param dest_num_components Number of components in the destionation. | ||
| 385 | * @param value_num_components Number of components in the value. | ||
| 386 | * @param is_saturated Optional, when True, saturates the provided value. | ||
| 387 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 388 | */ | ||
| 389 | void SetRegisterToHalfFloat(const Register& reg, u64 elem, const std::string& value, | ||
| 390 | Tegra::Shader::HalfMerge merge, u64 dest_num_components, | ||
| 391 | u64 value_num_components, bool is_saturated = false, | ||
| 392 | u64 dest_elem = 0) { | ||
| 393 | ASSERT_MSG(!is_saturated, "Unimplemented"); | ||
| 394 | |||
| 395 | const std::string result = [&]() { | ||
| 396 | switch (merge) { | ||
| 397 | case Tegra::Shader::HalfMerge::H0_H1: | ||
| 398 | return "uintBitsToFloat(packHalf2x16(" + value + "))"; | ||
| 399 | case Tegra::Shader::HalfMerge::F32: | ||
| 400 | // Half float instructions take the first component when doing a float cast. | ||
| 401 | return "float(" + value + ".x)"; | ||
| 402 | case Tegra::Shader::HalfMerge::Mrg_H0: | ||
| 403 | // TODO(Rodrigo): I guess Mrg_H0 and Mrg_H1 take their respective component from the | ||
| 404 | // pack. I couldn't test this on hardware but it shouldn't really matter since most | ||
| 405 | // of the time when a Mrg_* flag is used both components will be mirrored. That | ||
| 406 | // being said, it deserves a test. | ||
| 407 | return "((" + GetRegisterAsInteger(reg, 0, false) + | ||
| 408 | " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))"; | ||
| 409 | case Tegra::Shader::HalfMerge::Mrg_H1: | ||
| 410 | return "((" + GetRegisterAsInteger(reg, 0, false) + | ||
| 411 | " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))"; | ||
| 412 | default: | ||
| 413 | UNREACHABLE(); | ||
| 414 | return std::string("0"); | ||
| 415 | } | ||
| 416 | }(); | ||
| 417 | |||
| 418 | SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem); | ||
| 419 | } | ||
| 420 | |||
| 421 | /** | ||
| 379 | * Writes code that does a register assignment to input attribute operation. Input attributes | 422 | * Writes code that does a register assignment to input attribute operation. Input attributes |
| 380 | * are stored as floats, so this may require conversion. | 423 | * are stored as floats, so this may require conversion. |
| 381 | * @param reg The destination register to use. | 424 | * @param reg The destination register to use. |
| @@ -877,6 +920,19 @@ private: | |||
| 877 | return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); | 920 | return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); |
| 878 | } | 921 | } |
| 879 | 922 | ||
| 923 | /// Generates code representing a vec2 pair unpacked from a half float immediate | ||
| 924 | static std::string UnpackHalfImmediate(const Instruction& instr, bool negate) { | ||
| 925 | const std::string immediate = GetHalfFloat(std::to_string(instr.half_imm.PackImmediates())); | ||
| 926 | if (!negate) { | ||
| 927 | return immediate; | ||
| 928 | } | ||
| 929 | const std::string negate_first = instr.half_imm.first_negate != 0 ? "-" : ""; | ||
| 930 | const std::string negate_second = instr.half_imm.second_negate != 0 ? "-" : ""; | ||
| 931 | const std::string negate_vec = "vec2(" + negate_first + "1, " + negate_second + "1)"; | ||
| 932 | |||
| 933 | return '(' + immediate + " * " + negate_vec + ')'; | ||
| 934 | } | ||
| 935 | |||
| 880 | /// Generates code representing a texture sampler. | 936 | /// Generates code representing a texture sampler. |
| 881 | std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, | 937 | std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, |
| 882 | bool is_shadow) { | 938 | bool is_shadow) { |
| @@ -1013,6 +1069,41 @@ private: | |||
| 1013 | } | 1069 | } |
| 1014 | 1070 | ||
| 1015 | /* | 1071 | /* |
| 1072 | * Transforms the input string GLSL operand into an unpacked half float pair. | ||
| 1073 | * @note This function returns a float type pair instead of a half float pair. This is because | ||
| 1074 | * real half floats are not standarized in GLSL but unpackHalf2x16 (which returns a vec2) is. | ||
| 1075 | * @param operand Input operand. It has to be an unsigned integer. | ||
| 1076 | * @param type How to unpack the unsigned integer to a half float pair. | ||
| 1077 | * @param abs Get the absolute value of unpacked half floats. | ||
| 1078 | * @param neg Get the negative value of unpacked half floats. | ||
| 1079 | * @returns String corresponding to a half float pair. | ||
| 1080 | */ | ||
| 1081 | static std::string GetHalfFloat(const std::string& operand, | ||
| 1082 | Tegra::Shader::HalfType type = Tegra::Shader::HalfType::H0_H1, | ||
| 1083 | bool abs = false, bool neg = false) { | ||
| 1084 | // "vec2" calls emitted in this function are intended to alias components. | ||
| 1085 | const std::string value = [&]() { | ||
| 1086 | switch (type) { | ||
| 1087 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1088 | return "unpackHalf2x16(" + operand + ')'; | ||
| 1089 | case Tegra::Shader::HalfType::F32: | ||
| 1090 | return "vec2(uintBitsToFloat(" + operand + "))"; | ||
| 1091 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1092 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1093 | const bool high = type == Tegra::Shader::HalfType::H1_H1; | ||
| 1094 | const char unpack_index = "xy"[high ? 1 : 0]; | ||
| 1095 | return "vec2(unpackHalf2x16(" + operand + ")." + unpack_index + ')'; | ||
| 1096 | } | ||
| 1097 | default: | ||
| 1098 | UNREACHABLE(); | ||
| 1099 | return std::string("vec2(0)"); | ||
| 1100 | } | ||
| 1101 | }(); | ||
| 1102 | |||
| 1103 | return GetOperandAbsNeg(value, abs, neg); | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | /* | ||
| 1016 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | 1107 | * Returns whether the instruction at the specified offset is a 'sched' instruction. |
| 1017 | * Sched instructions always appear before a sequence of 3 instructions. | 1108 | * Sched instructions always appear before a sequence of 3 instructions. |
| 1018 | */ | 1109 | */ |
| @@ -1748,6 +1839,86 @@ private: | |||
| 1748 | 1839 | ||
| 1749 | break; | 1840 | break; |
| 1750 | } | 1841 | } |
| 1842 | case OpCode::Type::ArithmeticHalf: { | ||
| 1843 | if (opcode->GetId() == OpCode::Id::HADD2_C || opcode->GetId() == OpCode::Id::HADD2_R) { | ||
| 1844 | ASSERT_MSG(instr.alu_half.ftz == 0, "Unimplemented"); | ||
| 1845 | } | ||
| 1846 | const bool negate_a = | ||
| 1847 | opcode->GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; | ||
| 1848 | const bool negate_b = | ||
| 1849 | opcode->GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; | ||
| 1850 | |||
| 1851 | const std::string op_a = | ||
| 1852 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a, | ||
| 1853 | instr.alu_half.abs_a != 0, negate_a); | ||
| 1854 | |||
| 1855 | std::string op_b; | ||
| 1856 | switch (opcode->GetId()) { | ||
| 1857 | case OpCode::Id::HADD2_C: | ||
| 1858 | case OpCode::Id::HMUL2_C: | ||
| 1859 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 1860 | GLSLRegister::Type::UnsignedInteger); | ||
| 1861 | break; | ||
| 1862 | case OpCode::Id::HADD2_R: | ||
| 1863 | case OpCode::Id::HMUL2_R: | ||
| 1864 | op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, false); | ||
| 1865 | break; | ||
| 1866 | default: | ||
| 1867 | UNREACHABLE(); | ||
| 1868 | op_b = "0"; | ||
| 1869 | break; | ||
| 1870 | } | ||
| 1871 | op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b); | ||
| 1872 | |||
| 1873 | const std::string result = [&]() { | ||
| 1874 | switch (opcode->GetId()) { | ||
| 1875 | case OpCode::Id::HADD2_C: | ||
| 1876 | case OpCode::Id::HADD2_R: | ||
| 1877 | return '(' + op_a + " + " + op_b + ')'; | ||
| 1878 | case OpCode::Id::HMUL2_C: | ||
| 1879 | case OpCode::Id::HMUL2_R: | ||
| 1880 | return '(' + op_a + " * " + op_b + ')'; | ||
| 1881 | default: | ||
| 1882 | LOG_CRITICAL(HW_GPU, "Unhandled half float instruction: {}", opcode->GetName()); | ||
| 1883 | UNREACHABLE(); | ||
| 1884 | return std::string("0"); | ||
| 1885 | } | ||
| 1886 | }(); | ||
| 1887 | |||
| 1888 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half.merge, 1, 1, | ||
| 1889 | instr.alu_half.saturate != 0); | ||
| 1890 | break; | ||
| 1891 | } | ||
| 1892 | case OpCode::Type::ArithmeticHalfImmediate: { | ||
| 1893 | if (opcode->GetId() == OpCode::Id::HADD2_IMM) { | ||
| 1894 | ASSERT_MSG(instr.alu_half_imm.ftz == 0, "Unimplemented"); | ||
| 1895 | } else { | ||
| 1896 | ASSERT_MSG(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None, | ||
| 1897 | "Unimplemented"); | ||
| 1898 | } | ||
| 1899 | |||
| 1900 | const std::string op_a = GetHalfFloat( | ||
| 1901 | regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half_imm.type_a, | ||
| 1902 | instr.alu_half_imm.abs_a != 0, instr.alu_half_imm.negate_a != 0); | ||
| 1903 | |||
| 1904 | const std::string op_b = UnpackHalfImmediate(instr, true); | ||
| 1905 | |||
| 1906 | const std::string result = [&]() { | ||
| 1907 | switch (opcode->GetId()) { | ||
| 1908 | case OpCode::Id::HADD2_IMM: | ||
| 1909 | return op_a + " + " + op_b; | ||
| 1910 | case OpCode::Id::HMUL2_IMM: | ||
| 1911 | return op_a + " * " + op_b; | ||
| 1912 | default: | ||
| 1913 | UNREACHABLE(); | ||
| 1914 | return std::string("0"); | ||
| 1915 | } | ||
| 1916 | }(); | ||
| 1917 | |||
| 1918 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half_imm.merge, 1, 1, | ||
| 1919 | instr.alu_half_imm.saturate != 0); | ||
| 1920 | break; | ||
| 1921 | } | ||
| 1751 | case OpCode::Type::Ffma: { | 1922 | case OpCode::Type::Ffma: { |
| 1752 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | 1923 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); |
| 1753 | std::string op_b = instr.ffma.negate_b ? "-" : ""; | 1924 | std::string op_b = instr.ffma.negate_b ? "-" : ""; |
| @@ -1792,6 +1963,59 @@ private: | |||
| 1792 | instr.alu.saturate_d); | 1963 | instr.alu.saturate_d); |
| 1793 | break; | 1964 | break; |
| 1794 | } | 1965 | } |
| 1966 | case OpCode::Type::Hfma2: { | ||
| 1967 | if (opcode->GetId() == OpCode::Id::HFMA2_RR) { | ||
| 1968 | ASSERT_MSG(instr.hfma2.rr.precision == Tegra::Shader::HalfPrecision::None, | ||
| 1969 | "Unimplemented"); | ||
| 1970 | } else { | ||
| 1971 | ASSERT_MSG(instr.hfma2.precision == Tegra::Shader::HalfPrecision::None, | ||
| 1972 | "Unimplemented"); | ||
| 1973 | } | ||
| 1974 | const bool saturate = opcode->GetId() == OpCode::Id::HFMA2_RR | ||
| 1975 | ? instr.hfma2.rr.saturate != 0 | ||
| 1976 | : instr.hfma2.saturate != 0; | ||
| 1977 | |||
| 1978 | const std::string op_a = | ||
| 1979 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a); | ||
| 1980 | std::string op_b, op_c; | ||
| 1981 | |||
| 1982 | switch (opcode->GetId()) { | ||
| 1983 | case OpCode::Id::HFMA2_CR: | ||
| 1984 | op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 1985 | GLSLRegister::Type::UnsignedInteger), | ||
| 1986 | instr.hfma2.type_b, false, instr.hfma2.negate_b); | ||
| 1987 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 1988 | instr.hfma2.type_reg39, false, instr.hfma2.negate_c); | ||
| 1989 | break; | ||
| 1990 | case OpCode::Id::HFMA2_RC: | ||
| 1991 | op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 1992 | instr.hfma2.type_reg39, false, instr.hfma2.negate_b); | ||
| 1993 | op_c = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 1994 | GLSLRegister::Type::UnsignedInteger), | ||
| 1995 | instr.hfma2.type_b, false, instr.hfma2.negate_c); | ||
| 1996 | break; | ||
| 1997 | case OpCode::Id::HFMA2_RR: | ||
| 1998 | op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 1999 | instr.hfma2.type_b, false, instr.hfma2.negate_b); | ||
| 2000 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2001 | instr.hfma2.rr.type_c, false, instr.hfma2.rr.negate_c); | ||
| 2002 | break; | ||
| 2003 | case OpCode::Id::HFMA2_IMM_R: | ||
| 2004 | op_b = UnpackHalfImmediate(instr, true); | ||
| 2005 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2006 | instr.hfma2.type_reg39, false, instr.hfma2.negate_c); | ||
| 2007 | break; | ||
| 2008 | default: | ||
| 2009 | UNREACHABLE(); | ||
| 2010 | op_c = op_b = "vec2(0)"; | ||
| 2011 | break; | ||
| 2012 | } | ||
| 2013 | |||
| 2014 | const std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | ||
| 2015 | |||
| 2016 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.hfma2.merge, 1, 1, saturate); | ||
| 2017 | break; | ||
| 2018 | } | ||
| 1795 | case OpCode::Type::Conversion: { | 2019 | case OpCode::Type::Conversion: { |
| 1796 | switch (opcode->GetId()) { | 2020 | switch (opcode->GetId()) { |
| 1797 | case OpCode::Id::I2I_R: { | 2021 | case OpCode::Id::I2I_R: { |
| @@ -2611,6 +2835,51 @@ private: | |||
| 2611 | } | 2835 | } |
| 2612 | break; | 2836 | break; |
| 2613 | } | 2837 | } |
| 2838 | case OpCode::Type::HalfSetPredicate: { | ||
| 2839 | ASSERT_MSG(instr.hsetp2.ftz == 0, "Unimplemented"); | ||
| 2840 | |||
| 2841 | const std::string op_a = | ||
| 2842 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a, | ||
| 2843 | instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 2844 | |||
| 2845 | const std::string op_b = [&]() { | ||
| 2846 | switch (opcode->GetId()) { | ||
| 2847 | case OpCode::Id::HSETP2_R: | ||
| 2848 | return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 2849 | instr.hsetp2.type_b, instr.hsetp2.abs_a, | ||
| 2850 | instr.hsetp2.negate_b); | ||
| 2851 | default: | ||
| 2852 | UNREACHABLE(); | ||
| 2853 | return std::string("vec2(0)"); | ||
| 2854 | } | ||
| 2855 | }(); | ||
| 2856 | |||
| 2857 | // We can't use the constant predicate as destination. | ||
| 2858 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 2859 | |||
| 2860 | const std::string second_pred = | ||
| 2861 | GetPredicateCondition(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | ||
| 2862 | |||
| 2863 | const std::string combiner = GetPredicateCombiner(instr.hsetp2.op); | ||
| 2864 | |||
| 2865 | const std::string component_combiner = instr.hsetp2.h_and ? "&&" : "||"; | ||
| 2866 | const std::string predicate = | ||
| 2867 | '(' + GetPredicateComparison(instr.hsetp2.cond, op_a + ".x", op_b + ".x") + ' ' + | ||
| 2868 | component_combiner + ' ' + | ||
| 2869 | GetPredicateComparison(instr.hsetp2.cond, op_a + ".y", op_b + ".y") + ')'; | ||
| 2870 | |||
| 2871 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 2872 | SetPredicate(instr.hsetp2.pred3, | ||
| 2873 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 2874 | |||
| 2875 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 2876 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 2877 | // if enabled | ||
| 2878 | SetPredicate(instr.hsetp2.pred0, | ||
| 2879 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 2880 | } | ||
| 2881 | break; | ||
| 2882 | } | ||
| 2614 | case OpCode::Type::PredicateSetRegister: { | 2883 | case OpCode::Type::PredicateSetRegister: { |
| 2615 | const std::string op_a = | 2884 | const std::string op_a = |
| 2616 | GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); | 2885 | GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); |
| @@ -2771,6 +3040,50 @@ private: | |||
| 2771 | } | 3040 | } |
| 2772 | break; | 3041 | break; |
| 2773 | } | 3042 | } |
| 3043 | case OpCode::Type::HalfSet: { | ||
| 3044 | ASSERT_MSG(instr.hset2.ftz == 0, "Unimplemented"); | ||
| 3045 | |||
| 3046 | const std::string op_a = | ||
| 3047 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a, | ||
| 3048 | instr.hset2.abs_a != 0, instr.hset2.negate_a != 0); | ||
| 3049 | |||
| 3050 | const std::string op_b = [&]() { | ||
| 3051 | switch (opcode->GetId()) { | ||
| 3052 | case OpCode::Id::HSET2_R: | ||
| 3053 | return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 3054 | instr.hset2.type_b, instr.hset2.abs_b != 0, | ||
| 3055 | instr.hset2.negate_b != 0); | ||
| 3056 | default: | ||
| 3057 | UNREACHABLE(); | ||
| 3058 | return std::string("vec2(0)"); | ||
| 3059 | } | ||
| 3060 | }(); | ||
| 3061 | |||
| 3062 | const std::string second_pred = | ||
| 3063 | GetPredicateCondition(instr.hset2.pred39, instr.hset2.neg_pred != 0); | ||
| 3064 | |||
| 3065 | const std::string combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 3066 | |||
| 3067 | // HSET2 operates on each half float in the pack. | ||
| 3068 | std::string result; | ||
| 3069 | for (int i = 0; i < 2; ++i) { | ||
| 3070 | const std::string float_value = i == 0 ? "0x00003c00" : "0x3c000000"; | ||
| 3071 | const std::string integer_value = i == 0 ? "0x0000ffff" : "0xffff0000"; | ||
| 3072 | const std::string value = instr.hset2.bf == 1 ? float_value : integer_value; | ||
| 3073 | |||
| 3074 | const std::string comp = std::string(".") + "xy"[i]; | ||
| 3075 | const std::string predicate = | ||
| 3076 | "((" + GetPredicateComparison(instr.hset2.cond, op_a + comp, op_b + comp) + | ||
| 3077 | ") " + combiner + " (" + second_pred + "))"; | ||
| 3078 | |||
| 3079 | result += '(' + predicate + " ? " + value + " : 0)"; | ||
| 3080 | if (i == 0) { | ||
| 3081 | result += " | "; | ||
| 3082 | } | ||
| 3083 | } | ||
| 3084 | regs.SetRegisterToInteger(instr.gpr0, false, 0, '(' + result + ')', 1, 1); | ||
| 3085 | break; | ||
| 3086 | } | ||
| 2774 | case OpCode::Type::Xmad: { | 3087 | case OpCode::Type::Xmad: { |
| 2775 | ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented"); | 3088 | ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented"); |
| 2776 | ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented"); | 3089 | ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented"); |