summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2019-04-19 21:31:52 -0400
committerGravatar GitHub2019-04-19 21:31:52 -0400
commit650d9b1044352139cd7718a097fc4822e47ac3b0 (patch)
treedb5f74b09194640b25026e6de0e673baf2e2b230 /src
parentMerge pull request #2415 from lioncash/const (diff)
parentvk_shader_decompiler: Add missing operations (diff)
downloadyuzu-650d9b1044352139cd7718a097fc4822e47ac3b0.tar.gz
yuzu-650d9b1044352139cd7718a097fc4822e47ac3b0.tar.xz
yuzu-650d9b1044352139cd7718a097fc4822e47ac3b0.zip
Merge pull request #2409 from ReinUsesLisp/half-floats
shader_ir/decode: Miscellaneous fixes to half-float decompilation
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp126
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp25
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp15
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp17
-rw-r--r--src/video_core/shader/decode/half_set.cpp16
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp8
-rw-r--r--src/video_core/shader/decode/hfma2.cpp12
-rw-r--r--src/video_core/shader/shader_ir.cpp51
-rw-r--r--src/video_core/shader/shader_ir.h47
9 files changed, 181 insertions, 136 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 445048daf..cd462621d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -119,14 +119,10 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
119 119
120/// Returns true if an object has to be treated as precise 120/// Returns true if an object has to be treated as precise
121bool IsPrecise(Operation operand) { 121bool IsPrecise(Operation operand) {
122 const auto& meta = operand.GetMeta(); 122 const auto& meta{operand.GetMeta()};
123
124 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { 123 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
125 return arithmetic->precise; 124 return arithmetic->precise;
126 } 125 }
127 if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) {
128 return half_arithmetic->precise;
129 }
130 return false; 126 return false;
131} 127}
132 128
@@ -627,28 +623,7 @@ private:
627 } 623 }
628 624
629 std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { 625 std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
630 std::string value = VisitOperand(operation, operand_index); 626 return CastOperand(VisitOperand(operation, operand_index), type);
631 switch (type) {
632 case Type::HalfFloat: {
633 const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
634 if (!half_meta) {
635 value = "toHalf2(" + value + ')';
636 }
637
638 switch (half_meta->types.at(operand_index)) {
639 case Tegra::Shader::HalfType::H0_H1:
640 return "toHalf2(" + value + ')';
641 case Tegra::Shader::HalfType::F32:
642 return "vec2(" + value + ')';
643 case Tegra::Shader::HalfType::H0_H0:
644 return "vec2(toHalf2(" + value + ")[0])";
645 case Tegra::Shader::HalfType::H1_H1:
646 return "vec2(toHalf2(" + value + ")[1])";
647 }
648 }
649 default:
650 return CastOperand(value, type);
651 }
652 } 627 }
653 628
654 std::string CastOperand(const std::string& value, Type type) const { 629 std::string CastOperand(const std::string& value, Type type) const {
@@ -662,9 +637,7 @@ private:
662 case Type::Uint: 637 case Type::Uint:
663 return "ftou(" + value + ')'; 638 return "ftou(" + value + ')';
664 case Type::HalfFloat: 639 case Type::HalfFloat:
665 // Can't be handled as a stand-alone value 640 return "toHalf2(" + value + ')';
666 UNREACHABLE();
667 return value;
668 } 641 }
669 UNREACHABLE(); 642 UNREACHABLE();
670 return value; 643 return value;
@@ -1083,13 +1056,40 @@ private:
1083 return BitwiseCastResult(value, Type::HalfFloat); 1056 return BitwiseCastResult(value, Type::HalfFloat);
1084 } 1057 }
1085 1058
1059 std::string HClamp(Operation operation) {
1060 const std::string value = VisitOperand(operation, 0, Type::HalfFloat);
1061 const std::string min = VisitOperand(operation, 1, Type::Float);
1062 const std::string max = VisitOperand(operation, 2, Type::Float);
1063 const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))";
1064 return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
1065 }
1066
1067 std::string HUnpack(Operation operation) {
1068 const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
1069 const auto value = [&]() -> std::string {
1070 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1071 case Tegra::Shader::HalfType::H0_H1:
1072 return operand;
1073 case Tegra::Shader::HalfType::F32:
1074 return "vec2(fromHalf2(" + operand + "))";
1075 case Tegra::Shader::HalfType::H0_H0:
1076 return "vec2(" + operand + "[0])";
1077 case Tegra::Shader::HalfType::H1_H1:
1078 return "vec2(" + operand + "[1])";
1079 }
1080 UNREACHABLE();
1081 return "0";
1082 }();
1083 return "fromHalf2(" + value + ')';
1084 }
1085
1086 std::string HMergeF32(Operation operation) { 1086 std::string HMergeF32(Operation operation) {
1087 return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; 1087 return "float(toHalf2(" + Visit(operation[0]) + ")[0])";
1088 } 1088 }
1089 1089
1090 std::string HMergeH0(Operation operation) { 1090 std::string HMergeH0(Operation operation) {
1091 return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" + 1091 return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" +
1092 Visit(operation[1]) + ")[0]))"; 1092 Visit(operation[0]) + ")[1]))";
1093 } 1093 }
1094 1094
1095 std::string HMergeH1(Operation operation) { 1095 std::string HMergeH1(Operation operation) {
@@ -1189,34 +1189,46 @@ private:
1189 return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); 1189 return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
1190 } 1190 }
1191 1191
1192 template <bool with_nan>
1193 std::string GenerateHalfComparison(Operation operation, std::string compare_op) {
1194 std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
1195 Type::HalfFloat, Type::HalfFloat)};
1196 if constexpr (!with_nan) {
1197 return comparison;
1198 }
1199 return "halfFloatNanComparison(" + comparison + ", " +
1200 VisitOperand(operation, 0, Type::HalfFloat) + ", " +
1201 VisitOperand(operation, 1, Type::HalfFloat) + ')';
1202 }
1203
1204 template <bool with_nan>
1192 std::string Logical2HLessThan(Operation operation) { 1205 std::string Logical2HLessThan(Operation operation) {
1193 return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat, 1206 return GenerateHalfComparison<with_nan>(operation, "lessThan");
1194 Type::HalfFloat);
1195 } 1207 }
1196 1208
1209 template <bool with_nan>
1197 std::string Logical2HEqual(Operation operation) { 1210 std::string Logical2HEqual(Operation operation) {
1198 return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat, 1211 return GenerateHalfComparison<with_nan>(operation, "equal");
1199 Type::HalfFloat);
1200 } 1212 }
1201 1213
1214 template <bool with_nan>
1202 std::string Logical2HLessEqual(Operation operation) { 1215 std::string Logical2HLessEqual(Operation operation) {
1203 return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat, 1216 return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
1204 Type::HalfFloat);
1205 } 1217 }
1206 1218
1219 template <bool with_nan>
1207 std::string Logical2HGreaterThan(Operation operation) { 1220 std::string Logical2HGreaterThan(Operation operation) {
1208 return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat, 1221 return GenerateHalfComparison<with_nan>(operation, "greaterThan");
1209 Type::HalfFloat);
1210 } 1222 }
1211 1223
1224 template <bool with_nan>
1212 std::string Logical2HNotEqual(Operation operation) { 1225 std::string Logical2HNotEqual(Operation operation) {
1213 return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat, 1226 return GenerateHalfComparison<with_nan>(operation, "notEqual");
1214 Type::HalfFloat);
1215 } 1227 }
1216 1228
1229 template <bool with_nan>
1217 std::string Logical2HGreaterEqual(Operation operation) { 1230 std::string Logical2HGreaterEqual(Operation operation) {
1218 return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat, 1231 return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
1219 Type::HalfFloat);
1220 } 1232 }
1221 1233
1222 std::string Texture(Operation operation) { 1234 std::string Texture(Operation operation) {
@@ -1505,6 +1517,8 @@ private:
1505 &GLSLDecompiler::Fma<Type::HalfFloat>, 1517 &GLSLDecompiler::Fma<Type::HalfFloat>,
1506 &GLSLDecompiler::Absolute<Type::HalfFloat>, 1518 &GLSLDecompiler::Absolute<Type::HalfFloat>,
1507 &GLSLDecompiler::HNegate, 1519 &GLSLDecompiler::HNegate,
1520 &GLSLDecompiler::HClamp,
1521 &GLSLDecompiler::HUnpack,
1508 &GLSLDecompiler::HMergeF32, 1522 &GLSLDecompiler::HMergeF32,
1509 &GLSLDecompiler::HMergeH0, 1523 &GLSLDecompiler::HMergeH0,
1510 &GLSLDecompiler::HMergeH1, 1524 &GLSLDecompiler::HMergeH1,
@@ -1541,12 +1555,18 @@ private:
1541 &GLSLDecompiler::LogicalNotEqual<Type::Uint>, 1555 &GLSLDecompiler::LogicalNotEqual<Type::Uint>,
1542 &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, 1556 &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>,
1543 1557
1544 &GLSLDecompiler::Logical2HLessThan, 1558 &GLSLDecompiler::Logical2HLessThan<false>,
1545 &GLSLDecompiler::Logical2HEqual, 1559 &GLSLDecompiler::Logical2HEqual<false>,
1546 &GLSLDecompiler::Logical2HLessEqual, 1560 &GLSLDecompiler::Logical2HLessEqual<false>,
1547 &GLSLDecompiler::Logical2HGreaterThan, 1561 &GLSLDecompiler::Logical2HGreaterThan<false>,
1548 &GLSLDecompiler::Logical2HNotEqual, 1562 &GLSLDecompiler::Logical2HNotEqual<false>,
1549 &GLSLDecompiler::Logical2HGreaterEqual, 1563 &GLSLDecompiler::Logical2HGreaterEqual<false>,
1564 &GLSLDecompiler::Logical2HLessThan<true>,
1565 &GLSLDecompiler::Logical2HEqual<true>,
1566 &GLSLDecompiler::Logical2HLessEqual<true>,
1567 &GLSLDecompiler::Logical2HGreaterThan<true>,
1568 &GLSLDecompiler::Logical2HNotEqual<true>,
1569 &GLSLDecompiler::Logical2HGreaterEqual<true>,
1550 1570
1551 &GLSLDecompiler::Texture, 1571 &GLSLDecompiler::Texture,
1552 &GLSLDecompiler::TextureLod, 1572 &GLSLDecompiler::TextureLod,
@@ -1647,6 +1667,12 @@ std::string GetCommonDeclarations() {
1647 "}\n\n" 1667 "}\n\n"
1648 "vec2 toHalf2(float value) {\n" 1668 "vec2 toHalf2(float value) {\n"
1649 " return unpackHalf2x16(ftou(value));\n" 1669 " return unpackHalf2x16(ftou(value));\n"
1670 "}\n\n"
1671 "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n"
1672 " bvec2 is_nan1 = isnan(pair1);\n"
1673 " bvec2 is_nan2 = isnan(pair2);\n"
1674 " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
1675 "is_nan2.y);\n"
1650 "}\n"; 1676 "}\n";
1651} 1677}
1652 1678
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 25500f9a3..23d9b10db 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -76,14 +76,10 @@ constexpr u32 GetGenericAttributeLocation(Attribute::Index attribute) {
76 76
77/// Returns true if an object has to be treated as precise 77/// Returns true if an object has to be treated as precise
78bool IsPrecise(Operation operand) { 78bool IsPrecise(Operation operand) {
79 const auto& meta = operand.GetMeta(); 79 const auto& meta{operand.GetMeta()};
80
81 if (std::holds_alternative<MetaArithmetic>(meta)) { 80 if (std::holds_alternative<MetaArithmetic>(meta)) {
82 return std::get<MetaArithmetic>(meta).precise; 81 return std::get<MetaArithmetic>(meta).precise;
83 } 82 }
84 if (std::holds_alternative<MetaHalfArithmetic>(meta)) {
85 return std::get<MetaHalfArithmetic>(meta).precise;
86 }
87 return false; 83 return false;
88} 84}
89 85
@@ -746,6 +742,16 @@ private:
746 return {}; 742 return {};
747 } 743 }
748 744
745 Id HClamp(Operation operation) {
746 UNIMPLEMENTED();
747 return {};
748 }
749
750 Id HUnpack(Operation operation) {
751 UNIMPLEMENTED();
752 return {};
753 }
754
749 Id HMergeF32(Operation operation) { 755 Id HMergeF32(Operation operation) {
750 UNIMPLEMENTED(); 756 UNIMPLEMENTED();
751 return {}; 757 return {};
@@ -1218,6 +1224,8 @@ private:
1218 &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, 1224 &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
1219 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, 1225 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
1220 &SPIRVDecompiler::HNegate, 1226 &SPIRVDecompiler::HNegate,
1227 &SPIRVDecompiler::HClamp,
1228 &SPIRVDecompiler::HUnpack,
1221 &SPIRVDecompiler::HMergeF32, 1229 &SPIRVDecompiler::HMergeF32,
1222 &SPIRVDecompiler::HMergeH0, 1230 &SPIRVDecompiler::HMergeH0,
1223 &SPIRVDecompiler::HMergeH1, 1231 &SPIRVDecompiler::HMergeH1,
@@ -1260,6 +1268,13 @@ private:
1260 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, 1268 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>,
1261 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, 1269 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>,
1262 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, 1270 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>,
1271 // TODO(Rodrigo): Should these use the OpFUnord* variants?
1272 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>,
1273 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>,
1274 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>,
1275 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>,
1276 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>,
1277 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>,
1263 1278
1264 &SPIRVDecompiler::Texture, 1279 &SPIRVDecompiler::Texture,
1265 &SPIRVDecompiler::TextureLod, 1280 &SPIRVDecompiler::TextureLod,
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index baee89107..9467f9417 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -18,7 +18,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
18 18
19 if (opcode->get().GetId() == OpCode::Id::HADD2_C || 19 if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
20 opcode->get().GetId() == OpCode::Id::HADD2_R) { 20 opcode->get().GetId() == OpCode::Id::HADD2_R) {
21 UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); 21 if (instr.alu_half.ftz != 0) {
22 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
23 }
22 } 24 }
23 UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); 25 UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
24 26
@@ -27,9 +29,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
27 const bool negate_b = 29 const bool negate_b =
28 opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; 30 opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
29 31
30 const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); 32 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
31 33 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
32 // instr.alu_half.type_a
33 34
34 Node op_b = [&]() { 35 Node op_b = [&]() {
35 switch (opcode->get().GetId()) { 36 switch (opcode->get().GetId()) {
@@ -44,17 +45,17 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
44 return Immediate(0); 45 return Immediate(0);
45 } 46 }
46 }(); 47 }();
48 op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b);
47 op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); 49 op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
48 50
49 Node value = [&]() { 51 Node value = [&]() {
50 MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}};
51 switch (opcode->get().GetId()) { 52 switch (opcode->get().GetId()) {
52 case OpCode::Id::HADD2_C: 53 case OpCode::Id::HADD2_C:
53 case OpCode::Id::HADD2_R: 54 case OpCode::Id::HADD2_R:
54 return Operation(OperationCode::HAdd, meta, op_a, op_b); 55 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
55 case OpCode::Id::HMUL2_C: 56 case OpCode::Id::HMUL2_C:
56 case OpCode::Id::HMUL2_R: 57 case OpCode::Id::HMUL2_R:
57 return Operation(OperationCode::HMul, meta, op_a, op_b); 58 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
58 default: 59 default:
59 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); 60 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
60 return Immediate(0); 61 return Immediate(0);
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index c2164ba50..fbcd35b18 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -17,34 +17,33 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
19 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { 19 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
20 UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); 20 if (instr.alu_half_imm.ftz != 0) {
21 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
22 }
21 } else { 23 } else {
22 UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); 24 UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
23 } 25 }
24 UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0,
25 "Half float immediate saturation not implemented");
26 26
27 Node op_a = GetRegister(instr.gpr8); 27 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
28 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); 28 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
29 29
30 const Node op_b = UnpackHalfImmediate(instr, true); 30 const Node op_b = UnpackHalfImmediate(instr, true);
31 31
32 Node value = [&]() { 32 Node value = [&]() {
33 MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}};
34 switch (opcode->get().GetId()) { 33 switch (opcode->get().GetId()) {
35 case OpCode::Id::HADD2_IMM: 34 case OpCode::Id::HADD2_IMM:
36 return Operation(OperationCode::HAdd, meta, op_a, op_b); 35 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
37 case OpCode::Id::HMUL2_IMM: 36 case OpCode::Id::HMUL2_IMM:
38 return Operation(OperationCode::HMul, meta, op_a, op_b); 37 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
39 default: 38 default:
40 UNREACHABLE(); 39 UNREACHABLE();
41 return Immediate(0); 40 return Immediate(0);
42 } 41 }
43 }(); 42 }();
44 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
45 43
44 value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
45 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
46 SetRegister(bb, instr.gpr0, value); 46 SetRegister(bb, instr.gpr0, value);
47
48 return pc; 47 return pc;
49} 48}
50 49
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 748368555..1dd94bf9d 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -18,11 +18,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr); 19 const auto opcode = OpCode::Decode(instr);
20 20
21 UNIMPLEMENTED_IF(instr.hset2.ftz != 0); 21 if (instr.hset2.ftz != 0) {
22 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
23 }
24
25 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
26 op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
22 27
23 // instr.hset2.type_a
24 // instr.hset2.type_b
25 Node op_a = GetRegister(instr.gpr8);
26 Node op_b = [&]() { 28 Node op_b = [&]() {
27 switch (opcode->get().GetId()) { 29 switch (opcode->get().GetId()) {
28 case OpCode::Id::HSET2_R: 30 case OpCode::Id::HSET2_R:
@@ -32,14 +34,12 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
32 return Immediate(0); 34 return Immediate(0);
33 } 35 }
34 }(); 36 }();
35 37 op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
36 op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
37 op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); 38 op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
38 39
39 const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); 40 const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
40 41
41 MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; 42 const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b);
42 const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b);
43 43
44 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); 44 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
45 45
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index e68512692..6e59eb650 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -19,10 +19,10 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
19 19
20 UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); 20 UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
21 21
22 Node op_a = GetRegister(instr.gpr8); 22 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
23 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); 23 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
24 24
25 const Node op_b = [&]() { 25 Node op_b = [&]() {
26 switch (opcode->get().GetId()) { 26 switch (opcode->get().GetId()) {
27 case OpCode::Id::HSETP2_R: 27 case OpCode::Id::HSETP2_R:
28 return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, 28 return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
@@ -32,6 +32,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
32 return Immediate(0); 32 return Immediate(0);
33 } 33 }
34 }(); 34 }();
35 op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b);
35 36
36 // We can't use the constant predicate as destination. 37 // We can't use the constant predicate as destination.
37 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); 38 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
@@ -42,8 +43,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
42 const OperationCode pair_combiner = 43 const OperationCode pair_combiner =
43 instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; 44 instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
44 45
45 MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; 46 const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
46 const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b);
47 const Node first_pred = Operation(pair_combiner, comparison); 47 const Node first_pred = Operation(pair_combiner, comparison);
48 48
49 // Set the primary predicate to the result of Predicate OP SecondPredicate 49 // Set the primary predicate to the result of Predicate OP SecondPredicate
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 7a07c5ec6..5c1becce5 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -27,10 +27,6 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
27 } 27 }
28 28
29 constexpr auto identity = HalfType::H0_H1; 29 constexpr auto identity = HalfType::H0_H1;
30
31 const HalfType type_a = instr.hfma2.type_a;
32 const Node op_a = GetRegister(instr.gpr8);
33
34 bool neg_b{}, neg_c{}; 30 bool neg_b{}, neg_c{};
35 auto [saturate, type_b, op_b, type_c, 31 auto [saturate, type_b, op_b, type_c,
36 op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { 32 op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
@@ -62,11 +58,11 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
62 }(); 58 }();
63 UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); 59 UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
64 60
65 op_b = GetOperandAbsNegHalf(op_b, false, neg_b); 61 const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
66 op_c = GetOperandAbsNegHalf(op_c, false, neg_c); 62 op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
63 op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
67 64
68 MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; 65 Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
69 Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
70 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); 66 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
71 67
72 SetRegister(bb, instr.gpr0, value); 68 SetRegister(bb, instr.gpr0, value);
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index ac5112d78..17f2f711c 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -189,7 +189,11 @@ Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
189 const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); 189 const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
190 const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); 190 const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
191 191
192 return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); 192 return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate);
193}
194
195Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
196 return Operation(OperationCode::HUnpack, type, value);
193} 197}
194 198
195Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { 199Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
@@ -209,17 +213,26 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
209 213
210Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { 214Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
211 if (absolute) { 215 if (absolute) {
212 value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); 216 value = Operation(OperationCode::HAbsolute, NO_PRECISE, value);
213 } 217 }
214 if (negate) { 218 if (negate) {
215 value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), 219 value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true),
216 GetPredicate(true)); 220 GetPredicate(true));
217 } 221 }
218 return value; 222 return value;
219} 223}
220 224
225Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
226 if (!saturate) {
227 return value;
228 }
229 const Node positive_zero = Immediate(std::copysignf(0, 1));
230 const Node positive_one = Immediate(1.0f);
231 return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one);
232}
233
221Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { 234Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
222 static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { 235 const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
223 {PredCondition::LessThan, OperationCode::LogicalFLessThan}, 236 {PredCondition::LessThan, OperationCode::LogicalFLessThan},
224 {PredCondition::Equal, OperationCode::LogicalFEqual}, 237 {PredCondition::Equal, OperationCode::LogicalFEqual},
225 {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, 238 {PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
@@ -255,7 +268,7 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
255 268
256Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, 269Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
257 Node op_b) { 270 Node op_b) {
258 static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { 271 const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
259 {PredCondition::LessThan, OperationCode::LogicalILessThan}, 272 {PredCondition::LessThan, OperationCode::LogicalILessThan},
260 {PredCondition::Equal, OperationCode::LogicalIEqual}, 273 {PredCondition::Equal, OperationCode::LogicalIEqual},
261 {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, 274 {PredCondition::LessEqual, OperationCode::LogicalILessEqual},
@@ -283,40 +296,32 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
283 return predicate; 296 return predicate;
284} 297}
285 298
286Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, 299Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
287 const MetaHalfArithmetic& meta, Node op_a, Node op_b) { 300 Node op_b) {
288 301 const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
289 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
290 condition == PredCondition::NotEqualWithNan ||
291 condition == PredCondition::LessEqualWithNan ||
292 condition == PredCondition::GreaterThanWithNan ||
293 condition == PredCondition::GreaterEqualWithNan,
294 "Unimplemented NaN comparison for half floats");
295
296 static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
297 {PredCondition::LessThan, OperationCode::Logical2HLessThan}, 302 {PredCondition::LessThan, OperationCode::Logical2HLessThan},
298 {PredCondition::Equal, OperationCode::Logical2HEqual}, 303 {PredCondition::Equal, OperationCode::Logical2HEqual},
299 {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, 304 {PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
300 {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, 305 {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
301 {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, 306 {PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
302 {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, 307 {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
303 {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan}, 308 {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
304 {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual}, 309 {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
305 {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual}, 310 {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
306 {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan}, 311 {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
307 {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}}; 312 {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}};
308 313
309 const auto comparison{PredicateComparisonTable.find(condition)}; 314 const auto comparison{PredicateComparisonTable.find(condition)};
310 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 315 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
311 "Unknown predicate comparison operation"); 316 "Unknown predicate comparison operation");
312 317
313 const Node predicate = Operation(comparison->second, meta, op_a, op_b); 318 const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
314 319
315 return predicate; 320 return predicate;
316} 321}
317 322
318OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { 323OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
319 static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { 324 const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
320 {PredOperation::And, OperationCode::LogicalAnd}, 325 {PredOperation::And, OperationCode::LogicalAnd},
321 {PredOperation::Or, OperationCode::LogicalOr}, 326 {PredOperation::Or, OperationCode::LogicalOr},
322 {PredOperation::Xor, OperationCode::LogicalXor}, 327 {PredOperation::Xor, OperationCode::LogicalXor},
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 57af8b10f..81278fb33 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -109,11 +109,13 @@ enum class OperationCode {
109 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint 109 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
110 UBitCount, /// (MetaArithmetic, uint) -> uint 110 UBitCount, /// (MetaArithmetic, uint) -> uint
111 111
112 HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 112 HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
113 HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 113 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
114 HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 114 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
115 HAbsolute, /// (f16vec2 a) -> f16vec2 115 HAbsolute, /// (f16vec2 a) -> f16vec2
116 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 116 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
117 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
118 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
117 HMergeF32, /// (f16vec2 src) -> float 119 HMergeF32, /// (f16vec2 src) -> float
118 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 120 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
119 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 121 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
@@ -150,12 +152,18 @@ enum class OperationCode {
150 LogicalUNotEqual, /// (uint a, uint b) -> bool 152 LogicalUNotEqual, /// (uint a, uint b) -> bool
151 LogicalUGreaterEqual, /// (uint a, uint b) -> bool 153 LogicalUGreaterEqual, /// (uint a, uint b) -> bool
152 154
153 Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 155 Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
154 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 156 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
155 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 157 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
156 Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 158 Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
157 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 159 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
158 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 160 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
161 Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
162 Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
163 Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
164 Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
165 Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
166 Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
159 167
160 Texture, /// (MetaTexture, float[N] coords) -> float4 168 Texture, /// (MetaTexture, float[N] coords) -> float4
161 TextureLod, /// (MetaTexture, float[N] coords) -> float4 169 TextureLod, /// (MetaTexture, float[N] coords) -> float4
@@ -308,13 +316,6 @@ struct MetaArithmetic {
308 bool precise{}; 316 bool precise{};
309}; 317};
310 318
311struct MetaHalfArithmetic {
312 bool precise{};
313 std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1,
314 Tegra::Shader::HalfType::H0_H1,
315 Tegra::Shader::HalfType::H0_H1};
316};
317
318struct MetaTexture { 319struct MetaTexture {
319 const Sampler& sampler; 320 const Sampler& sampler;
320 Node array{}; 321 Node array{};
@@ -326,11 +327,10 @@ struct MetaTexture {
326 u32 element{}; 327 u32 element{};
327}; 328};
328 329
329constexpr MetaArithmetic PRECISE = {true}; 330inline constexpr MetaArithmetic PRECISE = {true};
330constexpr MetaArithmetic NO_PRECISE = {false}; 331inline constexpr MetaArithmetic NO_PRECISE = {false};
331constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false};
332 332
333using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; 333using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
334 334
335/// Holds any kind of operation that can be done in the IR 335/// Holds any kind of operation that can be done in the IR
336class OperationNode final { 336class OperationNode final {
@@ -734,10 +734,14 @@ private:
734 734
735 /// Unpacks a half immediate from an instruction 735 /// Unpacks a half immediate from an instruction
736 Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); 736 Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
737 /// Unpacks a binary value into a half float pair with a type format
738 Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
737 /// Merges a half pair into another value 739 /// Merges a half pair into another value
738 Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); 740 Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
739 /// Conditionally absolute/negated half float pair. Absolute is applied first 741 /// Conditionally absolute/negated half float pair. Absolute is applied first
740 Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); 742 Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
743 /// Conditionally saturates a half float pair
744 Node GetSaturatedHalfFloat(Node value, bool saturate = true);
741 745
742 /// Returns a predicate comparing two floats 746 /// Returns a predicate comparing two floats
743 Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); 747 Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
@@ -745,8 +749,7 @@ private:
745 Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, 749 Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
746 Node op_a, Node op_b); 750 Node op_a, Node op_b);
747 /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared 751 /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
748 Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, 752 Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
749 const MetaHalfArithmetic& meta, Node op_a, Node op_b);
750 753
751 /// Returns a predicate combiner operation 754 /// Returns a predicate combiner operation
752 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); 755 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);