diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 126 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 25 | ||||
| -rw-r--r-- | src/video_core/shader/decode/arithmetic_half.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/shader/decode/arithmetic_half_immediate.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/shader/decode/half_set.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/shader/decode/half_set_predicate.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/shader/decode/hfma2.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 51 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 47 |
9 files changed, 181 insertions, 136 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 445048daf..cd462621d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -119,14 +119,10 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | |||
| 119 | 119 | ||
| 120 | /// Returns true if an object has to be treated as precise | 120 | /// Returns true if an object has to be treated as precise |
| 121 | bool IsPrecise(Operation operand) { | 121 | bool IsPrecise(Operation operand) { |
| 122 | const auto& meta = operand.GetMeta(); | 122 | const auto& meta{operand.GetMeta()}; |
| 123 | |||
| 124 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { | 123 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { |
| 125 | return arithmetic->precise; | 124 | return arithmetic->precise; |
| 126 | } | 125 | } |
| 127 | if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) { | ||
| 128 | return half_arithmetic->precise; | ||
| 129 | } | ||
| 130 | return false; | 126 | return false; |
| 131 | } | 127 | } |
| 132 | 128 | ||
| @@ -627,28 +623,7 @@ private: | |||
| 627 | } | 623 | } |
| 628 | 624 | ||
| 629 | std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { | 625 | std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { |
| 630 | std::string value = VisitOperand(operation, operand_index); | 626 | return CastOperand(VisitOperand(operation, operand_index), type); |
| 631 | switch (type) { | ||
| 632 | case Type::HalfFloat: { | ||
| 633 | const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); | ||
| 634 | if (!half_meta) { | ||
| 635 | value = "toHalf2(" + value + ')'; | ||
| 636 | } | ||
| 637 | |||
| 638 | switch (half_meta->types.at(operand_index)) { | ||
| 639 | case Tegra::Shader::HalfType::H0_H1: | ||
| 640 | return "toHalf2(" + value + ')'; | ||
| 641 | case Tegra::Shader::HalfType::F32: | ||
| 642 | return "vec2(" + value + ')'; | ||
| 643 | case Tegra::Shader::HalfType::H0_H0: | ||
| 644 | return "vec2(toHalf2(" + value + ")[0])"; | ||
| 645 | case Tegra::Shader::HalfType::H1_H1: | ||
| 646 | return "vec2(toHalf2(" + value + ")[1])"; | ||
| 647 | } | ||
| 648 | } | ||
| 649 | default: | ||
| 650 | return CastOperand(value, type); | ||
| 651 | } | ||
| 652 | } | 627 | } |
| 653 | 628 | ||
| 654 | std::string CastOperand(const std::string& value, Type type) const { | 629 | std::string CastOperand(const std::string& value, Type type) const { |
| @@ -662,9 +637,7 @@ private: | |||
| 662 | case Type::Uint: | 637 | case Type::Uint: |
| 663 | return "ftou(" + value + ')'; | 638 | return "ftou(" + value + ')'; |
| 664 | case Type::HalfFloat: | 639 | case Type::HalfFloat: |
| 665 | // Can't be handled as a stand-alone value | 640 | return "toHalf2(" + value + ')'; |
| 666 | UNREACHABLE(); | ||
| 667 | return value; | ||
| 668 | } | 641 | } |
| 669 | UNREACHABLE(); | 642 | UNREACHABLE(); |
| 670 | return value; | 643 | return value; |
| @@ -1083,13 +1056,40 @@ private: | |||
| 1083 | return BitwiseCastResult(value, Type::HalfFloat); | 1056 | return BitwiseCastResult(value, Type::HalfFloat); |
| 1084 | } | 1057 | } |
| 1085 | 1058 | ||
| 1059 | std::string HClamp(Operation operation) { | ||
| 1060 | const std::string value = VisitOperand(operation, 0, Type::HalfFloat); | ||
| 1061 | const std::string min = VisitOperand(operation, 1, Type::Float); | ||
| 1062 | const std::string max = VisitOperand(operation, 2, Type::Float); | ||
| 1063 | const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))"; | ||
| 1064 | return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); | ||
| 1065 | } | ||
| 1066 | |||
| 1067 | std::string HUnpack(Operation operation) { | ||
| 1068 | const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; | ||
| 1069 | const auto value = [&]() -> std::string { | ||
| 1070 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1071 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1072 | return operand; | ||
| 1073 | case Tegra::Shader::HalfType::F32: | ||
| 1074 | return "vec2(fromHalf2(" + operand + "))"; | ||
| 1075 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1076 | return "vec2(" + operand + "[0])"; | ||
| 1077 | case Tegra::Shader::HalfType::H1_H1: | ||
| 1078 | return "vec2(" + operand + "[1])"; | ||
| 1079 | } | ||
| 1080 | UNREACHABLE(); | ||
| 1081 | return "0"; | ||
| 1082 | }(); | ||
| 1083 | return "fromHalf2(" + value + ')'; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | std::string HMergeF32(Operation operation) { | 1086 | std::string HMergeF32(Operation operation) { |
| 1087 | return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; | 1087 | return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; |
| 1088 | } | 1088 | } |
| 1089 | 1089 | ||
| 1090 | std::string HMergeH0(Operation operation) { | 1090 | std::string HMergeH0(Operation operation) { |
| 1091 | return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" + | 1091 | return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" + |
| 1092 | Visit(operation[1]) + ")[0]))"; | 1092 | Visit(operation[0]) + ")[1]))"; |
| 1093 | } | 1093 | } |
| 1094 | 1094 | ||
| 1095 | std::string HMergeH1(Operation operation) { | 1095 | std::string HMergeH1(Operation operation) { |
| @@ -1189,34 +1189,46 @@ private: | |||
| 1189 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); | 1189 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); |
| 1190 | } | 1190 | } |
| 1191 | 1191 | ||
| 1192 | template <bool with_nan> | ||
| 1193 | std::string GenerateHalfComparison(Operation operation, std::string compare_op) { | ||
| 1194 | std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, | ||
| 1195 | Type::HalfFloat, Type::HalfFloat)}; | ||
| 1196 | if constexpr (!with_nan) { | ||
| 1197 | return comparison; | ||
| 1198 | } | ||
| 1199 | return "halfFloatNanComparison(" + comparison + ", " + | ||
| 1200 | VisitOperand(operation, 0, Type::HalfFloat) + ", " + | ||
| 1201 | VisitOperand(operation, 1, Type::HalfFloat) + ')'; | ||
| 1202 | } | ||
| 1203 | |||
| 1204 | template <bool with_nan> | ||
| 1192 | std::string Logical2HLessThan(Operation operation) { | 1205 | std::string Logical2HLessThan(Operation operation) { |
| 1193 | return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat, | 1206 | return GenerateHalfComparison<with_nan>(operation, "lessThan"); |
| 1194 | Type::HalfFloat); | ||
| 1195 | } | 1207 | } |
| 1196 | 1208 | ||
| 1209 | template <bool with_nan> | ||
| 1197 | std::string Logical2HEqual(Operation operation) { | 1210 | std::string Logical2HEqual(Operation operation) { |
| 1198 | return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat, | 1211 | return GenerateHalfComparison<with_nan>(operation, "equal"); |
| 1199 | Type::HalfFloat); | ||
| 1200 | } | 1212 | } |
| 1201 | 1213 | ||
| 1214 | template <bool with_nan> | ||
| 1202 | std::string Logical2HLessEqual(Operation operation) { | 1215 | std::string Logical2HLessEqual(Operation operation) { |
| 1203 | return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat, | 1216 | return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); |
| 1204 | Type::HalfFloat); | ||
| 1205 | } | 1217 | } |
| 1206 | 1218 | ||
| 1219 | template <bool with_nan> | ||
| 1207 | std::string Logical2HGreaterThan(Operation operation) { | 1220 | std::string Logical2HGreaterThan(Operation operation) { |
| 1208 | return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat, | 1221 | return GenerateHalfComparison<with_nan>(operation, "greaterThan"); |
| 1209 | Type::HalfFloat); | ||
| 1210 | } | 1222 | } |
| 1211 | 1223 | ||
| 1224 | template <bool with_nan> | ||
| 1212 | std::string Logical2HNotEqual(Operation operation) { | 1225 | std::string Logical2HNotEqual(Operation operation) { |
| 1213 | return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat, | 1226 | return GenerateHalfComparison<with_nan>(operation, "notEqual"); |
| 1214 | Type::HalfFloat); | ||
| 1215 | } | 1227 | } |
| 1216 | 1228 | ||
| 1229 | template <bool with_nan> | ||
| 1217 | std::string Logical2HGreaterEqual(Operation operation) { | 1230 | std::string Logical2HGreaterEqual(Operation operation) { |
| 1218 | return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat, | 1231 | return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); |
| 1219 | Type::HalfFloat); | ||
| 1220 | } | 1232 | } |
| 1221 | 1233 | ||
| 1222 | std::string Texture(Operation operation) { | 1234 | std::string Texture(Operation operation) { |
| @@ -1505,6 +1517,8 @@ private: | |||
| 1505 | &GLSLDecompiler::Fma<Type::HalfFloat>, | 1517 | &GLSLDecompiler::Fma<Type::HalfFloat>, |
| 1506 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | 1518 | &GLSLDecompiler::Absolute<Type::HalfFloat>, |
| 1507 | &GLSLDecompiler::HNegate, | 1519 | &GLSLDecompiler::HNegate, |
| 1520 | &GLSLDecompiler::HClamp, | ||
| 1521 | &GLSLDecompiler::HUnpack, | ||
| 1508 | &GLSLDecompiler::HMergeF32, | 1522 | &GLSLDecompiler::HMergeF32, |
| 1509 | &GLSLDecompiler::HMergeH0, | 1523 | &GLSLDecompiler::HMergeH0, |
| 1510 | &GLSLDecompiler::HMergeH1, | 1524 | &GLSLDecompiler::HMergeH1, |
| @@ -1541,12 +1555,18 @@ private: | |||
| 1541 | &GLSLDecompiler::LogicalNotEqual<Type::Uint>, | 1555 | &GLSLDecompiler::LogicalNotEqual<Type::Uint>, |
| 1542 | &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, | 1556 | &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, |
| 1543 | 1557 | ||
| 1544 | &GLSLDecompiler::Logical2HLessThan, | 1558 | &GLSLDecompiler::Logical2HLessThan<false>, |
| 1545 | &GLSLDecompiler::Logical2HEqual, | 1559 | &GLSLDecompiler::Logical2HEqual<false>, |
| 1546 | &GLSLDecompiler::Logical2HLessEqual, | 1560 | &GLSLDecompiler::Logical2HLessEqual<false>, |
| 1547 | &GLSLDecompiler::Logical2HGreaterThan, | 1561 | &GLSLDecompiler::Logical2HGreaterThan<false>, |
| 1548 | &GLSLDecompiler::Logical2HNotEqual, | 1562 | &GLSLDecompiler::Logical2HNotEqual<false>, |
| 1549 | &GLSLDecompiler::Logical2HGreaterEqual, | 1563 | &GLSLDecompiler::Logical2HGreaterEqual<false>, |
| 1564 | &GLSLDecompiler::Logical2HLessThan<true>, | ||
| 1565 | &GLSLDecompiler::Logical2HEqual<true>, | ||
| 1566 | &GLSLDecompiler::Logical2HLessEqual<true>, | ||
| 1567 | &GLSLDecompiler::Logical2HGreaterThan<true>, | ||
| 1568 | &GLSLDecompiler::Logical2HNotEqual<true>, | ||
| 1569 | &GLSLDecompiler::Logical2HGreaterEqual<true>, | ||
| 1550 | 1570 | ||
| 1551 | &GLSLDecompiler::Texture, | 1571 | &GLSLDecompiler::Texture, |
| 1552 | &GLSLDecompiler::TextureLod, | 1572 | &GLSLDecompiler::TextureLod, |
| @@ -1647,6 +1667,12 @@ std::string GetCommonDeclarations() { | |||
| 1647 | "}\n\n" | 1667 | "}\n\n" |
| 1648 | "vec2 toHalf2(float value) {\n" | 1668 | "vec2 toHalf2(float value) {\n" |
| 1649 | " return unpackHalf2x16(ftou(value));\n" | 1669 | " return unpackHalf2x16(ftou(value));\n" |
| 1670 | "}\n\n" | ||
| 1671 | "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n" | ||
| 1672 | " bvec2 is_nan1 = isnan(pair1);\n" | ||
| 1673 | " bvec2 is_nan2 = isnan(pair2);\n" | ||
| 1674 | " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " | ||
| 1675 | "is_nan2.y);\n" | ||
| 1650 | "}\n"; | 1676 | "}\n"; |
| 1651 | } | 1677 | } |
| 1652 | 1678 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 25500f9a3..23d9b10db 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -76,14 +76,10 @@ constexpr u32 GetGenericAttributeLocation(Attribute::Index attribute) { | |||
| 76 | 76 | ||
| 77 | /// Returns true if an object has to be treated as precise | 77 | /// Returns true if an object has to be treated as precise |
| 78 | bool IsPrecise(Operation operand) { | 78 | bool IsPrecise(Operation operand) { |
| 79 | const auto& meta = operand.GetMeta(); | 79 | const auto& meta{operand.GetMeta()}; |
| 80 | |||
| 81 | if (std::holds_alternative<MetaArithmetic>(meta)) { | 80 | if (std::holds_alternative<MetaArithmetic>(meta)) { |
| 82 | return std::get<MetaArithmetic>(meta).precise; | 81 | return std::get<MetaArithmetic>(meta).precise; |
| 83 | } | 82 | } |
| 84 | if (std::holds_alternative<MetaHalfArithmetic>(meta)) { | ||
| 85 | return std::get<MetaHalfArithmetic>(meta).precise; | ||
| 86 | } | ||
| 87 | return false; | 83 | return false; |
| 88 | } | 84 | } |
| 89 | 85 | ||
| @@ -746,6 +742,16 @@ private: | |||
| 746 | return {}; | 742 | return {}; |
| 747 | } | 743 | } |
| 748 | 744 | ||
| 745 | Id HClamp(Operation operation) { | ||
| 746 | UNIMPLEMENTED(); | ||
| 747 | return {}; | ||
| 748 | } | ||
| 749 | |||
| 750 | Id HUnpack(Operation operation) { | ||
| 751 | UNIMPLEMENTED(); | ||
| 752 | return {}; | ||
| 753 | } | ||
| 754 | |||
| 749 | Id HMergeF32(Operation operation) { | 755 | Id HMergeF32(Operation operation) { |
| 750 | UNIMPLEMENTED(); | 756 | UNIMPLEMENTED(); |
| 751 | return {}; | 757 | return {}; |
| @@ -1218,6 +1224,8 @@ private: | |||
| 1218 | &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, | 1224 | &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, |
| 1219 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, | 1225 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, |
| 1220 | &SPIRVDecompiler::HNegate, | 1226 | &SPIRVDecompiler::HNegate, |
| 1227 | &SPIRVDecompiler::HClamp, | ||
| 1228 | &SPIRVDecompiler::HUnpack, | ||
| 1221 | &SPIRVDecompiler::HMergeF32, | 1229 | &SPIRVDecompiler::HMergeF32, |
| 1222 | &SPIRVDecompiler::HMergeH0, | 1230 | &SPIRVDecompiler::HMergeH0, |
| 1223 | &SPIRVDecompiler::HMergeH1, | 1231 | &SPIRVDecompiler::HMergeH1, |
| @@ -1260,6 +1268,13 @@ private: | |||
| 1260 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, | 1268 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, |
| 1261 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, | 1269 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, |
| 1262 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, | 1270 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, |
| 1271 | // TODO(Rodrigo): Should these use the OpFUnord* variants? | ||
| 1272 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>, | ||
| 1273 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>, | ||
| 1274 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>, | ||
| 1275 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, | ||
| 1276 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, | ||
| 1277 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, | ||
| 1263 | 1278 | ||
| 1264 | &SPIRVDecompiler::Texture, | 1279 | &SPIRVDecompiler::Texture, |
| 1265 | &SPIRVDecompiler::TextureLod, | 1280 | &SPIRVDecompiler::TextureLod, |
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index baee89107..9467f9417 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp | |||
| @@ -18,7 +18,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||
| 18 | 18 | ||
| 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_C || | 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_C || |
| 20 | opcode->get().GetId() == OpCode::Id::HADD2_R) { | 20 | opcode->get().GetId() == OpCode::Id::HADD2_R) { |
| 21 | UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); | 21 | if (instr.alu_half.ftz != 0) { |
| 22 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | ||
| 23 | } | ||
| 22 | } | 24 | } |
| 23 | UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); | 25 | UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); |
| 24 | 26 | ||
| @@ -27,9 +29,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||
| 27 | const bool negate_b = | 29 | const bool negate_b = |
| 28 | opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; | 30 | opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; |
| 29 | 31 | ||
| 30 | const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); | 32 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); |
| 31 | 33 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); | |
| 32 | // instr.alu_half.type_a | ||
| 33 | 34 | ||
| 34 | Node op_b = [&]() { | 35 | Node op_b = [&]() { |
| 35 | switch (opcode->get().GetId()) { | 36 | switch (opcode->get().GetId()) { |
| @@ -44,17 +45,17 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||
| 44 | return Immediate(0); | 45 | return Immediate(0); |
| 45 | } | 46 | } |
| 46 | }(); | 47 | }(); |
| 48 | op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); | ||
| 47 | op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); | 49 | op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); |
| 48 | 50 | ||
| 49 | Node value = [&]() { | 51 | Node value = [&]() { |
| 50 | MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; | ||
| 51 | switch (opcode->get().GetId()) { | 52 | switch (opcode->get().GetId()) { |
| 52 | case OpCode::Id::HADD2_C: | 53 | case OpCode::Id::HADD2_C: |
| 53 | case OpCode::Id::HADD2_R: | 54 | case OpCode::Id::HADD2_R: |
| 54 | return Operation(OperationCode::HAdd, meta, op_a, op_b); | 55 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); |
| 55 | case OpCode::Id::HMUL2_C: | 56 | case OpCode::Id::HMUL2_C: |
| 56 | case OpCode::Id::HMUL2_R: | 57 | case OpCode::Id::HMUL2_R: |
| 57 | return Operation(OperationCode::HMul, meta, op_a, op_b); | 58 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); |
| 58 | default: | 59 | default: |
| 59 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | 60 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); |
| 60 | return Immediate(0); | 61 | return Immediate(0); |
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index c2164ba50..fbcd35b18 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp | |||
| @@ -17,34 +17,33 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { | |||
| 17 | const auto opcode = OpCode::Decode(instr); | 17 | const auto opcode = OpCode::Decode(instr); |
| 18 | 18 | ||
| 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { |
| 20 | UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); | 20 | if (instr.alu_half_imm.ftz != 0) { |
| 21 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | ||
| 22 | } | ||
| 21 | } else { | 23 | } else { |
| 22 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); | 24 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); |
| 23 | } | 25 | } |
| 24 | UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, | ||
| 25 | "Half float immediate saturation not implemented"); | ||
| 26 | 26 | ||
| 27 | Node op_a = GetRegister(instr.gpr8); | 27 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); |
| 28 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); | 28 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); |
| 29 | 29 | ||
| 30 | const Node op_b = UnpackHalfImmediate(instr, true); | 30 | const Node op_b = UnpackHalfImmediate(instr, true); |
| 31 | 31 | ||
| 32 | Node value = [&]() { | 32 | Node value = [&]() { |
| 33 | MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; | ||
| 34 | switch (opcode->get().GetId()) { | 33 | switch (opcode->get().GetId()) { |
| 35 | case OpCode::Id::HADD2_IMM: | 34 | case OpCode::Id::HADD2_IMM: |
| 36 | return Operation(OperationCode::HAdd, meta, op_a, op_b); | 35 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); |
| 37 | case OpCode::Id::HMUL2_IMM: | 36 | case OpCode::Id::HMUL2_IMM: |
| 38 | return Operation(OperationCode::HMul, meta, op_a, op_b); | 37 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); |
| 39 | default: | 38 | default: |
| 40 | UNREACHABLE(); | 39 | UNREACHABLE(); |
| 41 | return Immediate(0); | 40 | return Immediate(0); |
| 42 | } | 41 | } |
| 43 | }(); | 42 | }(); |
| 44 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 45 | 43 | ||
| 44 | value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); | ||
| 45 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 46 | SetRegister(bb, instr.gpr0, value); | 46 | SetRegister(bb, instr.gpr0, value); |
| 47 | |||
| 48 | return pc; | 47 | return pc; |
| 49 | } | 48 | } |
| 50 | 49 | ||
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 748368555..1dd94bf9d 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp | |||
| @@ -18,11 +18,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | |||
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | 19 | const auto opcode = OpCode::Decode(instr); |
| 20 | 20 | ||
| 21 | UNIMPLEMENTED_IF(instr.hset2.ftz != 0); | 21 | if (instr.hset2.ftz != 0) { |
| 22 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | ||
| 23 | } | ||
| 24 | |||
| 25 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); | ||
| 26 | op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); | ||
| 22 | 27 | ||
| 23 | // instr.hset2.type_a | ||
| 24 | // instr.hset2.type_b | ||
| 25 | Node op_a = GetRegister(instr.gpr8); | ||
| 26 | Node op_b = [&]() { | 28 | Node op_b = [&]() { |
| 27 | switch (opcode->get().GetId()) { | 29 | switch (opcode->get().GetId()) { |
| 28 | case OpCode::Id::HSET2_R: | 30 | case OpCode::Id::HSET2_R: |
| @@ -32,14 +34,12 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | |||
| 32 | return Immediate(0); | 34 | return Immediate(0); |
| 33 | } | 35 | } |
| 34 | }(); | 36 | }(); |
| 35 | 37 | op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); | |
| 36 | op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); | ||
| 37 | op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); | 38 | op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); |
| 38 | 39 | ||
| 39 | const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | 40 | const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); |
| 40 | 41 | ||
| 41 | MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; | 42 | const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); |
| 42 | const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); | ||
| 43 | 43 | ||
| 44 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | 44 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); |
| 45 | 45 | ||
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index e68512692..6e59eb650 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -19,10 +19,10 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 19 | 19 | ||
| 20 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | 20 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); |
| 21 | 21 | ||
| 22 | Node op_a = GetRegister(instr.gpr8); | 22 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); |
| 23 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | 23 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); |
| 24 | 24 | ||
| 25 | const Node op_b = [&]() { | 25 | Node op_b = [&]() { |
| 26 | switch (opcode->get().GetId()) { | 26 | switch (opcode->get().GetId()) { |
| 27 | case OpCode::Id::HSETP2_R: | 27 | case OpCode::Id::HSETP2_R: |
| 28 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, | 28 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, |
| @@ -32,6 +32,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 32 | return Immediate(0); | 32 | return Immediate(0); |
| 33 | } | 33 | } |
| 34 | }(); | 34 | }(); |
| 35 | op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); | ||
| 35 | 36 | ||
| 36 | // We can't use the constant predicate as destination. | 37 | // We can't use the constant predicate as destination. |
| 37 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 38 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
| @@ -42,8 +43,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 42 | const OperationCode pair_combiner = | 43 | const OperationCode pair_combiner = |
| 43 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; | 44 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; |
| 44 | 45 | ||
| 45 | MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; | 46 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); |
| 46 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); | ||
| 47 | const Node first_pred = Operation(pair_combiner, comparison); | 47 | const Node first_pred = Operation(pair_combiner, comparison); |
| 48 | 48 | ||
| 49 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 49 | // Set the primary predicate to the result of Predicate OP SecondPredicate |
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 7a07c5ec6..5c1becce5 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp | |||
| @@ -27,10 +27,6 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | constexpr auto identity = HalfType::H0_H1; | 29 | constexpr auto identity = HalfType::H0_H1; |
| 30 | |||
| 31 | const HalfType type_a = instr.hfma2.type_a; | ||
| 32 | const Node op_a = GetRegister(instr.gpr8); | ||
| 33 | |||
| 34 | bool neg_b{}, neg_c{}; | 30 | bool neg_b{}, neg_c{}; |
| 35 | auto [saturate, type_b, op_b, type_c, | 31 | auto [saturate, type_b, op_b, type_c, |
| 36 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { | 32 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { |
| @@ -62,11 +58,11 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||
| 62 | }(); | 58 | }(); |
| 63 | UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); | 59 | UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); |
| 64 | 60 | ||
| 65 | op_b = GetOperandAbsNegHalf(op_b, false, neg_b); | 61 | const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); |
| 66 | op_c = GetOperandAbsNegHalf(op_c, false, neg_c); | 62 | op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); |
| 63 | op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); | ||
| 67 | 64 | ||
| 68 | MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; | 65 | Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); |
| 69 | Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); | ||
| 70 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | 66 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); |
| 71 | 67 | ||
| 72 | SetRegister(bb, instr.gpr0, value); | 68 | SetRegister(bb, instr.gpr0, value); |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index ac5112d78..17f2f711c 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -189,7 +189,11 @@ Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | |||
| 189 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | 189 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); |
| 190 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | 190 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); |
| 191 | 191 | ||
| 192 | return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); | 192 | return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); |
| 193 | } | ||
| 194 | |||
| 195 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { | ||
| 196 | return Operation(OperationCode::HUnpack, type, value); | ||
| 193 | } | 197 | } |
| 194 | 198 | ||
| 195 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | 199 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { |
| @@ -209,17 +213,26 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 209 | 213 | ||
| 210 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | 214 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { |
| 211 | if (absolute) { | 215 | if (absolute) { |
| 212 | value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); | 216 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); |
| 213 | } | 217 | } |
| 214 | if (negate) { | 218 | if (negate) { |
| 215 | value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), | 219 | value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), |
| 216 | GetPredicate(true)); | 220 | GetPredicate(true)); |
| 217 | } | 221 | } |
| 218 | return value; | 222 | return value; |
| 219 | } | 223 | } |
| 220 | 224 | ||
| 225 | Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | ||
| 226 | if (!saturate) { | ||
| 227 | return value; | ||
| 228 | } | ||
| 229 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 230 | const Node positive_one = Immediate(1.0f); | ||
| 231 | return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); | ||
| 232 | } | ||
| 233 | |||
| 221 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | 234 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |
| 222 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | 235 | const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { |
| 223 | {PredCondition::LessThan, OperationCode::LogicalFLessThan}, | 236 | {PredCondition::LessThan, OperationCode::LogicalFLessThan}, |
| 224 | {PredCondition::Equal, OperationCode::LogicalFEqual}, | 237 | {PredCondition::Equal, OperationCode::LogicalFEqual}, |
| 225 | {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, | 238 | {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, |
| @@ -255,7 +268,7 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N | |||
| 255 | 268 | ||
| 256 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, | 269 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, |
| 257 | Node op_b) { | 270 | Node op_b) { |
| 258 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | 271 | const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { |
| 259 | {PredCondition::LessThan, OperationCode::LogicalILessThan}, | 272 | {PredCondition::LessThan, OperationCode::LogicalILessThan}, |
| 260 | {PredCondition::Equal, OperationCode::LogicalIEqual}, | 273 | {PredCondition::Equal, OperationCode::LogicalIEqual}, |
| 261 | {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, | 274 | {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, |
| @@ -283,40 +296,32 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si | |||
| 283 | return predicate; | 296 | return predicate; |
| 284 | } | 297 | } |
| 285 | 298 | ||
| 286 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, | 299 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, |
| 287 | const MetaHalfArithmetic& meta, Node op_a, Node op_b) { | 300 | Node op_b) { |
| 288 | 301 | const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | |
| 289 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | ||
| 290 | condition == PredCondition::NotEqualWithNan || | ||
| 291 | condition == PredCondition::LessEqualWithNan || | ||
| 292 | condition == PredCondition::GreaterThanWithNan || | ||
| 293 | condition == PredCondition::GreaterEqualWithNan, | ||
| 294 | "Unimplemented NaN comparison for half floats"); | ||
| 295 | |||
| 296 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 297 | {PredCondition::LessThan, OperationCode::Logical2HLessThan}, | 302 | {PredCondition::LessThan, OperationCode::Logical2HLessThan}, |
| 298 | {PredCondition::Equal, OperationCode::Logical2HEqual}, | 303 | {PredCondition::Equal, OperationCode::Logical2HEqual}, |
| 299 | {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, | 304 | {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, |
| 300 | {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, | 305 | {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, |
| 301 | {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, | 306 | {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, |
| 302 | {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, | 307 | {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, |
| 303 | {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan}, | 308 | {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, |
| 304 | {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual}, | 309 | {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, |
| 305 | {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual}, | 310 | {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, |
| 306 | {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan}, | 311 | {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, |
| 307 | {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}}; | 312 | {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}}; |
| 308 | 313 | ||
| 309 | const auto comparison{PredicateComparisonTable.find(condition)}; | 314 | const auto comparison{PredicateComparisonTable.find(condition)}; |
| 310 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 315 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 311 | "Unknown predicate comparison operation"); | 316 | "Unknown predicate comparison operation"); |
| 312 | 317 | ||
| 313 | const Node predicate = Operation(comparison->second, meta, op_a, op_b); | 318 | const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); |
| 314 | 319 | ||
| 315 | return predicate; | 320 | return predicate; |
| 316 | } | 321 | } |
| 317 | 322 | ||
| 318 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | 323 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |
| 319 | static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { | 324 | const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { |
| 320 | {PredOperation::And, OperationCode::LogicalAnd}, | 325 | {PredOperation::And, OperationCode::LogicalAnd}, |
| 321 | {PredOperation::Or, OperationCode::LogicalOr}, | 326 | {PredOperation::Or, OperationCode::LogicalOr}, |
| 322 | {PredOperation::Xor, OperationCode::LogicalXor}, | 327 | {PredOperation::Xor, OperationCode::LogicalXor}, |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 57af8b10f..81278fb33 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -109,11 +109,13 @@ enum class OperationCode { | |||
| 109 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | 109 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint |
| 110 | UBitCount, /// (MetaArithmetic, uint) -> uint | 110 | UBitCount, /// (MetaArithmetic, uint) -> uint |
| 111 | 111 | ||
| 112 | HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 112 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 113 | HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 113 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 114 | HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | 114 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 |
| 115 | HAbsolute, /// (f16vec2 a) -> f16vec2 | 115 | HAbsolute, /// (f16vec2 a) -> f16vec2 |
| 116 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | 116 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 |
| 117 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 | ||
| 118 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 | ||
| 117 | HMergeF32, /// (f16vec2 src) -> float | 119 | HMergeF32, /// (f16vec2 src) -> float |
| 118 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 120 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 119 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 121 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| @@ -150,12 +152,18 @@ enum class OperationCode { | |||
| 150 | LogicalUNotEqual, /// (uint a, uint b) -> bool | 152 | LogicalUNotEqual, /// (uint a, uint b) -> bool |
| 151 | LogicalUGreaterEqual, /// (uint a, uint b) -> bool | 153 | LogicalUGreaterEqual, /// (uint a, uint b) -> bool |
| 152 | 154 | ||
| 153 | Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 155 | Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 154 | Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 156 | Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 155 | Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 157 | Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 156 | Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 158 | Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 157 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 159 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 158 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 160 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 161 | Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 162 | Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 163 | Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 164 | Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 165 | Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 166 | Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 159 | 167 | ||
| 160 | Texture, /// (MetaTexture, float[N] coords) -> float4 | 168 | Texture, /// (MetaTexture, float[N] coords) -> float4 |
| 161 | TextureLod, /// (MetaTexture, float[N] coords) -> float4 | 169 | TextureLod, /// (MetaTexture, float[N] coords) -> float4 |
| @@ -308,13 +316,6 @@ struct MetaArithmetic { | |||
| 308 | bool precise{}; | 316 | bool precise{}; |
| 309 | }; | 317 | }; |
| 310 | 318 | ||
| 311 | struct MetaHalfArithmetic { | ||
| 312 | bool precise{}; | ||
| 313 | std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1, | ||
| 314 | Tegra::Shader::HalfType::H0_H1, | ||
| 315 | Tegra::Shader::HalfType::H0_H1}; | ||
| 316 | }; | ||
| 317 | |||
| 318 | struct MetaTexture { | 319 | struct MetaTexture { |
| 319 | const Sampler& sampler; | 320 | const Sampler& sampler; |
| 320 | Node array{}; | 321 | Node array{}; |
| @@ -326,11 +327,10 @@ struct MetaTexture { | |||
| 326 | u32 element{}; | 327 | u32 element{}; |
| 327 | }; | 328 | }; |
| 328 | 329 | ||
| 329 | constexpr MetaArithmetic PRECISE = {true}; | 330 | inline constexpr MetaArithmetic PRECISE = {true}; |
| 330 | constexpr MetaArithmetic NO_PRECISE = {false}; | 331 | inline constexpr MetaArithmetic NO_PRECISE = {false}; |
| 331 | constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; | ||
| 332 | 332 | ||
| 333 | using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; | 333 | using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; |
| 334 | 334 | ||
| 335 | /// Holds any kind of operation that can be done in the IR | 335 | /// Holds any kind of operation that can be done in the IR |
| 336 | class OperationNode final { | 336 | class OperationNode final { |
| @@ -734,10 +734,14 @@ private: | |||
| 734 | 734 | ||
| 735 | /// Unpacks a half immediate from an instruction | 735 | /// Unpacks a half immediate from an instruction |
| 736 | Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); | 736 | Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); |
| 737 | /// Unpacks a binary value into a half float pair with a type format | ||
| 738 | Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); | ||
| 737 | /// Merges a half pair into another value | 739 | /// Merges a half pair into another value |
| 738 | Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); | 740 | Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); |
| 739 | /// Conditionally absolute/negated half float pair. Absolute is applied first | 741 | /// Conditionally absolute/negated half float pair. Absolute is applied first |
| 740 | Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); | 742 | Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); |
| 743 | /// Conditionally saturates a half float pair | ||
| 744 | Node GetSaturatedHalfFloat(Node value, bool saturate = true); | ||
| 741 | 745 | ||
| 742 | /// Returns a predicate comparing two floats | 746 | /// Returns a predicate comparing two floats |
| 743 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | 747 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); |
| @@ -745,8 +749,7 @@ private: | |||
| 745 | Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, | 749 | Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, |
| 746 | Node op_a, Node op_b); | 750 | Node op_a, Node op_b); |
| 747 | /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared | 751 | /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared |
| 748 | Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, | 752 | Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); |
| 749 | const MetaHalfArithmetic& meta, Node op_a, Node op_b); | ||
| 750 | 753 | ||
| 751 | /// Returns a predicate combiner operation | 754 | /// Returns a predicate combiner operation |
| 752 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | 755 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); |