diff options
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/shader/decode/conversion.cpp | 30 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 25 |
5 files changed, 75 insertions, 18 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 8520a0143..cc307f8a4 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -1018,8 +1018,6 @@ union Instruction { | |||
| 1018 | } f2i; | 1018 | } f2i; |
| 1019 | 1019 | ||
| 1020 | union { | 1020 | union { |
| 1021 | BitField<8, 2, Register::Size> src_size; | ||
| 1022 | BitField<10, 2, Register::Size> dst_size; | ||
| 1023 | BitField<39, 4, u64> rounding; | 1021 | BitField<39, 4, u64> rounding; |
| 1024 | // H0, H1 extract for F16 missing | 1022 | // H0, H1 extract for F16 missing |
| 1025 | BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value | 1023 | BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e19d502bc..7e9251626 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -1122,6 +1122,16 @@ private: | |||
| 1122 | Type::Float); | 1122 | Type::Float); |
| 1123 | } | 1123 | } |
| 1124 | 1124 | ||
| 1125 | std::string FCastHalf0(Operation operation) { | ||
| 1126 | const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); | ||
| 1127 | return fmt::format("({})[0]", op_a); | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | std::string FCastHalf1(Operation operation) { | ||
| 1131 | const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); | ||
| 1132 | return fmt::format("({})[1]", op_a); | ||
| 1133 | } | ||
| 1134 | |||
| 1125 | template <Type type> | 1135 | template <Type type> |
| 1126 | std::string Min(Operation operation) { | 1136 | std::string Min(Operation operation) { |
| 1127 | return GenerateBinaryCall(operation, "min", type, type, type); | 1137 | return GenerateBinaryCall(operation, "min", type, type, type); |
| @@ -1278,6 +1288,11 @@ private: | |||
| 1278 | return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); | 1288 | return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); |
| 1279 | } | 1289 | } |
| 1280 | 1290 | ||
| 1291 | std::string HCastFloat(Operation operation) { | ||
| 1292 | const std::string op_a = VisitOperand(operation, 0, Type::Float); | ||
| 1293 | return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a); | ||
| 1294 | } | ||
| 1295 | |||
| 1281 | std::string HUnpack(Operation operation) { | 1296 | std::string HUnpack(Operation operation) { |
| 1282 | const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; | 1297 | const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; |
| 1283 | const auto value = [&]() -> std::string { | 1298 | const auto value = [&]() -> std::string { |
| @@ -1718,6 +1733,8 @@ private: | |||
| 1718 | &GLSLDecompiler::Negate<Type::Float>, | 1733 | &GLSLDecompiler::Negate<Type::Float>, |
| 1719 | &GLSLDecompiler::Absolute<Type::Float>, | 1734 | &GLSLDecompiler::Absolute<Type::Float>, |
| 1720 | &GLSLDecompiler::FClamp, | 1735 | &GLSLDecompiler::FClamp, |
| 1736 | &GLSLDecompiler::FCastHalf0, | ||
| 1737 | &GLSLDecompiler::FCastHalf1, | ||
| 1721 | &GLSLDecompiler::Min<Type::Float>, | 1738 | &GLSLDecompiler::Min<Type::Float>, |
| 1722 | &GLSLDecompiler::Max<Type::Float>, | 1739 | &GLSLDecompiler::Max<Type::Float>, |
| 1723 | &GLSLDecompiler::FCos, | 1740 | &GLSLDecompiler::FCos, |
| @@ -1778,6 +1795,7 @@ private: | |||
| 1778 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | 1795 | &GLSLDecompiler::Absolute<Type::HalfFloat>, |
| 1779 | &GLSLDecompiler::HNegate, | 1796 | &GLSLDecompiler::HNegate, |
| 1780 | &GLSLDecompiler::HClamp, | 1797 | &GLSLDecompiler::HClamp, |
| 1798 | &GLSLDecompiler::HCastFloat, | ||
| 1781 | &GLSLDecompiler::HUnpack, | 1799 | &GLSLDecompiler::HUnpack, |
| 1782 | &GLSLDecompiler::HMergeF32, | 1800 | &GLSLDecompiler::HMergeF32, |
| 1783 | &GLSLDecompiler::HMergeH0, | 1801 | &GLSLDecompiler::HMergeH0, |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index d267712c9..24a591797 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -735,6 +735,16 @@ private: | |||
| 735 | return {}; | 735 | return {}; |
| 736 | } | 736 | } |
| 737 | 737 | ||
| 738 | Id FCastHalf0(Operation operation) { | ||
| 739 | UNIMPLEMENTED(); | ||
| 740 | return {}; | ||
| 741 | } | ||
| 742 | |||
| 743 | Id FCastHalf1(Operation operation) { | ||
| 744 | UNIMPLEMENTED(); | ||
| 745 | return {}; | ||
| 746 | } | ||
| 747 | |||
| 738 | Id HNegate(Operation operation) { | 748 | Id HNegate(Operation operation) { |
| 739 | UNIMPLEMENTED(); | 749 | UNIMPLEMENTED(); |
| 740 | return {}; | 750 | return {}; |
| @@ -745,6 +755,11 @@ private: | |||
| 745 | return {}; | 755 | return {}; |
| 746 | } | 756 | } |
| 747 | 757 | ||
| 758 | Id HCastFloat(Operation operation) { | ||
| 759 | UNIMPLEMENTED(); | ||
| 760 | return {}; | ||
| 761 | } | ||
| 762 | |||
| 748 | Id HUnpack(Operation operation) { | 763 | Id HUnpack(Operation operation) { |
| 749 | UNIMPLEMENTED(); | 764 | UNIMPLEMENTED(); |
| 750 | return {}; | 765 | return {}; |
| @@ -1210,6 +1225,8 @@ private: | |||
| 1210 | &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, | 1225 | &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, |
| 1211 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, | 1226 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, |
| 1212 | &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, | 1227 | &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, |
| 1228 | &SPIRVDecompiler::FCastHalf0, | ||
| 1229 | &SPIRVDecompiler::FCastHalf1, | ||
| 1213 | &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, | 1230 | &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, |
| 1214 | &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, | 1231 | &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, |
| 1215 | &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, | 1232 | &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, |
| @@ -1270,6 +1287,7 @@ private: | |||
| 1270 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, | 1287 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, |
| 1271 | &SPIRVDecompiler::HNegate, | 1288 | &SPIRVDecompiler::HNegate, |
| 1272 | &SPIRVDecompiler::HClamp, | 1289 | &SPIRVDecompiler::HClamp, |
| 1290 | &SPIRVDecompiler::HCastFloat, | ||
| 1273 | &SPIRVDecompiler::HUnpack, | 1291 | &SPIRVDecompiler::HUnpack, |
| 1274 | &SPIRVDecompiler::HMergeF32, | 1292 | &SPIRVDecompiler::HMergeF32, |
| 1275 | &SPIRVDecompiler::HMergeH0, | 1293 | &SPIRVDecompiler::HMergeH0, |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 4221f0c58..8973fbefa 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -57,7 +57,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 57 | case OpCode::Id::I2F_R: | 57 | case OpCode::Id::I2F_R: |
| 58 | case OpCode::Id::I2F_C: | 58 | case OpCode::Id::I2F_C: |
| 59 | case OpCode::Id::I2F_IMM: { | 59 | case OpCode::Id::I2F_IMM: { |
| 60 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | 60 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 61 | UNIMPLEMENTED_IF(instr.conversion.selector); | 61 | UNIMPLEMENTED_IF(instr.conversion.selector); |
| 62 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 62 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 63 | "Condition codes generation in I2F is not implemented"); | 63 | "Condition codes generation in I2F is not implemented"); |
| @@ -82,14 +82,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 82 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | 82 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); |
| 83 | 83 | ||
| 84 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | 84 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
| 85 | |||
| 86 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 87 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 88 | } | ||
| 89 | |||
| 85 | SetRegister(bb, instr.gpr0, value); | 90 | SetRegister(bb, instr.gpr0, value); |
| 86 | break; | 91 | break; |
| 87 | } | 92 | } |
| 88 | case OpCode::Id::F2F_R: | 93 | case OpCode::Id::F2F_R: |
| 89 | case OpCode::Id::F2F_C: | 94 | case OpCode::Id::F2F_C: |
| 90 | case OpCode::Id::F2F_IMM: { | 95 | case OpCode::Id::F2F_IMM: { |
| 91 | UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); | 96 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |
| 92 | UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); | 97 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); |
| 93 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 98 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 94 | "Condition codes generation in F2F is not implemented"); | 99 | "Condition codes generation in F2F is not implemented"); |
| 95 | 100 | ||
| @@ -107,6 +112,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 107 | } | 112 | } |
| 108 | }(); | 113 | }(); |
| 109 | 114 | ||
| 115 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 116 | // TODO: figure where extract is sey in the encoding | ||
| 117 | value = Operation(OperationCode::FCastHalf0, PRECISE, value); | ||
| 118 | } | ||
| 119 | |||
| 110 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | 120 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
| 111 | 121 | ||
| 112 | value = [&]() { | 122 | value = [&]() { |
| @@ -124,19 +134,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 124 | default: | 134 | default: |
| 125 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | 135 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", |
| 126 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); | 136 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); |
| 127 | return Immediate(0); | 137 | return value; |
| 128 | } | 138 | } |
| 129 | }(); | 139 | }(); |
| 130 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | 140 | value = GetSaturatedFloat(value, instr.alu.saturate_d); |
| 131 | 141 | ||
| 132 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | 142 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |
| 143 | |||
| 144 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 145 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 146 | } | ||
| 147 | |||
| 133 | SetRegister(bb, instr.gpr0, value); | 148 | SetRegister(bb, instr.gpr0, value); |
| 134 | break; | 149 | break; |
| 135 | } | 150 | } |
| 136 | case OpCode::Id::F2I_R: | 151 | case OpCode::Id::F2I_R: |
| 137 | case OpCode::Id::F2I_C: | 152 | case OpCode::Id::F2I_C: |
| 138 | case OpCode::Id::F2I_IMM: { | 153 | case OpCode::Id::F2I_IMM: { |
| 139 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | 154 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); |
| 140 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 155 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 141 | "Condition codes generation in F2I is not implemented"); | 156 | "Condition codes generation in F2I is not implemented"); |
| 142 | Node value = [&]() { | 157 | Node value = [&]() { |
| @@ -153,6 +168,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 153 | } | 168 | } |
| 154 | }(); | 169 | }(); |
| 155 | 170 | ||
| 171 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 172 | // TODO: figure where extract is sey in the encoding | ||
| 173 | value = Operation(OperationCode::FCastHalf0, PRECISE, value); | ||
| 174 | } | ||
| 175 | |||
| 156 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | 176 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
| 157 | 177 | ||
| 158 | value = [&]() { | 178 | value = [&]() { |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 715184d67..5f0852364 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -30,6 +30,8 @@ enum class OperationCode { | |||
| 30 | FNegate, /// (MetaArithmetic, float a) -> float | 30 | FNegate, /// (MetaArithmetic, float a) -> float |
| 31 | FAbsolute, /// (MetaArithmetic, float a) -> float | 31 | FAbsolute, /// (MetaArithmetic, float a) -> float |
| 32 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float | 32 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float |
| 33 | FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 34 | FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 33 | FMin, /// (MetaArithmetic, float a, float b) -> float | 35 | FMin, /// (MetaArithmetic, float a, float b) -> float |
| 34 | FMax, /// (MetaArithmetic, float a, float b) -> float | 36 | FMax, /// (MetaArithmetic, float a, float b) -> float |
| 35 | FCos, /// (MetaArithmetic, float a) -> float | 37 | FCos, /// (MetaArithmetic, float a) -> float |
| @@ -83,17 +85,18 @@ enum class OperationCode { | |||
| 83 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | 85 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint |
| 84 | UBitCount, /// (MetaArithmetic, uint) -> uint | 86 | UBitCount, /// (MetaArithmetic, uint) -> uint |
| 85 | 87 | ||
| 86 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 88 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 87 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 89 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 88 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | 90 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 |
| 89 | HAbsolute, /// (f16vec2 a) -> f16vec2 | 91 | HAbsolute, /// (f16vec2 a) -> f16vec2 |
| 90 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | 92 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 |
| 91 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 | 93 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 |
| 92 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 | 94 | HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 |
| 93 | HMergeF32, /// (f16vec2 src) -> float | 95 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 |
| 94 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 96 | HMergeF32, /// (f16vec2 src) -> float |
| 95 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 97 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 96 | HPack2, /// (float a, float b) -> f16vec2 | 98 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 99 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 97 | 100 | ||
| 98 | LogicalAssign, /// (bool& dst, bool src) -> void | 101 | LogicalAssign, /// (bool& dst, bool src) -> void |
| 99 | LogicalAnd, /// (bool a, bool b) -> bool | 102 | LogicalAnd, /// (bool a, bool b) -> bool |