diff options
| author | 2018-06-11 23:19:37 -0400 | |
|---|---|---|
| committer | 2018-06-11 23:19:37 -0400 | |
| commit | 2dc8b5c2241f6a18f636bbb7a1ff6403c76e7eab (patch) | |
| tree | b6f3b5e2cdaedf6ef94be6437f67fc07054484cd /src | |
| parent | Merge pull request #555 from Subv/gpu_sysregs (diff) | |
| parent | gl_shader_decompiler: Implement saturate for float instructions. (diff) | |
| download | yuzu-2dc8b5c2241f6a18f636bbb7a1ff6403c76e7eab.tar.gz yuzu-2dc8b5c2241f6a18f636bbb7a1ff6403c76e7eab.tar.xz yuzu-2dc8b5c2241f6a18f636bbb7a1ff6403c76e7eab.zip | |
Merge pull request #552 from bunnei/sat-fmul
gl_shader_decompiler: Implement saturate for float instructions.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 68 |
2 files changed, 32 insertions, 39 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index ec8dbd370..c158ffed2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -213,7 +213,6 @@ union Instruction { | |||
| 213 | BitField<28, 8, Register> gpr28; | 213 | BitField<28, 8, Register> gpr28; |
| 214 | BitField<39, 8, Register> gpr39; | 214 | BitField<39, 8, Register> gpr39; |
| 215 | BitField<48, 16, u64> opcode; | 215 | BitField<48, 16, u64> opcode; |
| 216 | BitField<50, 1, u64> saturate_a; | ||
| 217 | 216 | ||
| 218 | union { | 217 | union { |
| 219 | BitField<20, 19, u64> imm20_19; | 218 | BitField<20, 19, u64> imm20_19; |
| @@ -222,7 +221,7 @@ union Instruction { | |||
| 222 | BitField<46, 1, u64> abs_a; | 221 | BitField<46, 1, u64> abs_a; |
| 223 | BitField<48, 1, u64> negate_a; | 222 | BitField<48, 1, u64> negate_a; |
| 224 | BitField<49, 1, u64> abs_b; | 223 | BitField<49, 1, u64> abs_b; |
| 225 | BitField<50, 1, u64> abs_d; | 224 | BitField<50, 1, u64> saturate_d; |
| 226 | BitField<56, 1, u64> negate_imm; | 225 | BitField<56, 1, u64> negate_imm; |
| 227 | 226 | ||
| 228 | union { | 227 | union { |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 67726e7c6..b94b79384 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -299,13 +299,15 @@ public: | |||
| 299 | * @param value The code representing the value to assign. | 299 | * @param value The code representing the value to assign. |
| 300 | * @param dest_num_components Number of components in the destination. | 300 | * @param dest_num_components Number of components in the destination. |
| 301 | * @param value_num_components Number of components in the value. | 301 | * @param value_num_components Number of components in the value. |
| 302 | * @param is_abs Optional, when True, applies absolute value to output. | 302 | * @param is_saturated Optional, when True, saturates the provided value. |
| 303 | * @param dest_elem Optional, the destination element to use for the operation. | 303 | * @param dest_elem Optional, the destination element to use for the operation. |
| 304 | */ | 304 | */ |
| 305 | void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, | 305 | void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, |
| 306 | u64 dest_num_components, u64 value_num_components, bool is_abs = false, | 306 | u64 dest_num_components, u64 value_num_components, |
| 307 | u64 dest_elem = 0) { | 307 | bool is_saturated = false, u64 dest_elem = 0) { |
| 308 | SetRegister(reg, elem, value, dest_num_components, value_num_components, is_abs, dest_elem); | 308 | |
| 309 | SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, | ||
| 310 | dest_num_components, value_num_components, dest_elem); | ||
| 309 | } | 311 | } |
| 310 | 312 | ||
| 311 | /** | 313 | /** |
| @@ -315,18 +317,21 @@ public: | |||
| 315 | * @param value The code representing the value to assign. | 317 | * @param value The code representing the value to assign. |
| 316 | * @param dest_num_components Number of components in the destination. | 318 | * @param dest_num_components Number of components in the destination. |
| 317 | * @param value_num_components Number of components in the value. | 319 | * @param value_num_components Number of components in the value. |
| 318 | * @param is_abs Optional, when True, applies absolute value to output. | 320 | * @param is_saturated Optional, when True, saturates the provided value. |
| 319 | * @param dest_elem Optional, the destination element to use for the operation. | 321 | * @param dest_elem Optional, the destination element to use for the operation. |
| 320 | */ | 322 | */ |
| 321 | void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, | 323 | void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, |
| 322 | const std::string& value, u64 dest_num_components, | 324 | const std::string& value, u64 dest_num_components, |
| 323 | u64 value_num_components, bool is_abs = false, u64 dest_elem = 0) { | 325 | u64 value_num_components, bool is_saturated = false, |
| 326 | u64 dest_elem = 0) { | ||
| 327 | ASSERT_MSG(!is_saturated, "Unimplemented"); | ||
| 328 | |||
| 324 | const std::string func = GetGLSLConversionFunc( | 329 | const std::string func = GetGLSLConversionFunc( |
| 325 | is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger, | 330 | is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger, |
| 326 | GLSLRegister::Type::Float); | 331 | GLSLRegister::Type::Float); |
| 327 | 332 | ||
| 328 | SetRegister(reg, elem, func + '(' + value + ')', dest_num_components, value_num_components, | 333 | SetRegister(reg, elem, func + '(' + value + ')', dest_num_components, value_num_components, |
| 329 | is_abs, dest_elem); | 334 | dest_elem); |
| 330 | } | 335 | } |
| 331 | 336 | ||
| 332 | /** | 337 | /** |
| @@ -500,12 +505,10 @@ private: | |||
| 500 | * @param value The code representing the value to assign. | 505 | * @param value The code representing the value to assign. |
| 501 | * @param dest_num_components Number of components in the destination. | 506 | * @param dest_num_components Number of components in the destination. |
| 502 | * @param value_num_components Number of components in the value. | 507 | * @param value_num_components Number of components in the value. |
| 503 | * @param is_abs Optional, when True, applies absolute value to output. | ||
| 504 | * @param dest_elem Optional, the destination element to use for the operation. | 508 | * @param dest_elem Optional, the destination element to use for the operation. |
| 505 | */ | 509 | */ |
| 506 | void SetRegister(const Register& reg, u64 elem, const std::string& value, | 510 | void SetRegister(const Register& reg, u64 elem, const std::string& value, |
| 507 | u64 dest_num_components, u64 value_num_components, bool is_abs, | 511 | u64 dest_num_components, u64 value_num_components, u64 dest_elem) { |
| 508 | u64 dest_elem) { | ||
| 509 | std::string dest = GetRegister(reg, dest_elem); | 512 | std::string dest = GetRegister(reg, dest_elem); |
| 510 | if (dest_num_components > 1) { | 513 | if (dest_num_components > 1) { |
| 511 | dest += GetSwizzle(elem); | 514 | dest += GetSwizzle(elem); |
| @@ -516,8 +519,6 @@ private: | |||
| 516 | src += GetSwizzle(elem); | 519 | src += GetSwizzle(elem); |
| 517 | } | 520 | } |
| 518 | 521 | ||
| 519 | src = is_abs ? "abs(" + src + ')' : src; | ||
| 520 | |||
| 521 | shader.AddLine(dest + " = " + src + ';'); | 522 | shader.AddLine(dest + " = " + src + ';'); |
| 522 | } | 523 | } |
| 523 | 524 | ||
| @@ -808,9 +809,8 @@ private: | |||
| 808 | case OpCode::Id::FMUL_C: | 809 | case OpCode::Id::FMUL_C: |
| 809 | case OpCode::Id::FMUL_R: | 810 | case OpCode::Id::FMUL_R: |
| 810 | case OpCode::Id::FMUL_IMM: { | 811 | case OpCode::Id::FMUL_IMM: { |
| 811 | ASSERT_MSG(!instr.saturate_a, "Unimplemented"); | 812 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, |
| 812 | 813 | instr.alu.saturate_d); | |
| 813 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, instr.alu.abs_d); | ||
| 814 | break; | 814 | break; |
| 815 | } | 815 | } |
| 816 | case OpCode::Id::FMUL32_IMM: { | 816 | case OpCode::Id::FMUL32_IMM: { |
| @@ -823,41 +823,39 @@ private: | |||
| 823 | case OpCode::Id::FADD_C: | 823 | case OpCode::Id::FADD_C: |
| 824 | case OpCode::Id::FADD_R: | 824 | case OpCode::Id::FADD_R: |
| 825 | case OpCode::Id::FADD_IMM: { | 825 | case OpCode::Id::FADD_IMM: { |
| 826 | ASSERT_MSG(!instr.saturate_a, "Unimplemented"); | 826 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, |
| 827 | 827 | instr.alu.saturate_d); | |
| 828 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, instr.alu.abs_d); | ||
| 829 | break; | 828 | break; |
| 830 | } | 829 | } |
| 831 | case OpCode::Id::MUFU: { | 830 | case OpCode::Id::MUFU: { |
| 832 | ASSERT_MSG(!instr.saturate_a, "Unimplemented"); | ||
| 833 | |||
| 834 | switch (instr.sub_op) { | 831 | switch (instr.sub_op) { |
| 835 | case SubOp::Cos: | 832 | case SubOp::Cos: |
| 836 | regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, | 833 | regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, |
| 837 | instr.alu.abs_d); | 834 | instr.alu.saturate_d); |
| 838 | break; | 835 | break; |
| 839 | case SubOp::Sin: | 836 | case SubOp::Sin: |
| 840 | regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, | 837 | regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, |
| 841 | instr.alu.abs_d); | 838 | instr.alu.saturate_d); |
| 842 | break; | 839 | break; |
| 843 | case SubOp::Ex2: | 840 | case SubOp::Ex2: |
| 844 | regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, | 841 | regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, |
| 845 | instr.alu.abs_d); | 842 | instr.alu.saturate_d); |
| 846 | break; | 843 | break; |
| 847 | case SubOp::Lg2: | 844 | case SubOp::Lg2: |
| 848 | regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, | 845 | regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, |
| 849 | instr.alu.abs_d); | 846 | instr.alu.saturate_d); |
| 850 | break; | 847 | break; |
| 851 | case SubOp::Rcp: | 848 | case SubOp::Rcp: |
| 852 | regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, instr.alu.abs_d); | 849 | regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, |
| 850 | instr.alu.saturate_d); | ||
| 853 | break; | 851 | break; |
| 854 | case SubOp::Rsq: | 852 | case SubOp::Rsq: |
| 855 | regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, | 853 | regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, |
| 856 | instr.alu.abs_d); | 854 | instr.alu.saturate_d); |
| 857 | break; | 855 | break; |
| 858 | case SubOp::Min: | 856 | case SubOp::Min: |
| 859 | regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1, | 857 | regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1, |
| 860 | instr.alu.abs_d); | 858 | instr.alu.saturate_d); |
| 861 | break; | 859 | break; |
| 862 | default: | 860 | default: |
| 863 | NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", | 861 | NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", |
| @@ -1028,8 +1026,8 @@ private: | |||
| 1028 | case OpCode::Id::IADD_C: | 1026 | case OpCode::Id::IADD_C: |
| 1029 | case OpCode::Id::IADD_R: | 1027 | case OpCode::Id::IADD_R: |
| 1030 | case OpCode::Id::IADD_IMM: { | 1028 | case OpCode::Id::IADD_IMM: { |
| 1031 | ASSERT_MSG(!instr.saturate_a, "Unimplemented"); | 1029 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, |
| 1032 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1); | 1030 | instr.alu.saturate_d); |
| 1033 | break; | 1031 | break; |
| 1034 | } | 1032 | } |
| 1035 | case OpCode::Id::ISCADD_C: | 1033 | case OpCode::Id::ISCADD_C: |
| @@ -1051,8 +1049,6 @@ private: | |||
| 1051 | break; | 1049 | break; |
| 1052 | } | 1050 | } |
| 1053 | case OpCode::Type::Ffma: { | 1051 | case OpCode::Type::Ffma: { |
| 1054 | ASSERT_MSG(!instr.saturate_a, "Unimplemented"); | ||
| 1055 | |||
| 1056 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | 1052 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); |
| 1057 | std::string op_b = instr.ffma.negate_b ? "-" : ""; | 1053 | std::string op_b = instr.ffma.negate_b ? "-" : ""; |
| 1058 | std::string op_c = instr.ffma.negate_c ? "-" : ""; | 1054 | std::string op_c = instr.ffma.negate_c ? "-" : ""; |
| @@ -1086,13 +1082,13 @@ private: | |||
| 1086 | } | 1082 | } |
| 1087 | } | 1083 | } |
| 1088 | 1084 | ||
| 1089 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1); | 1085 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1, |
| 1086 | instr.alu.saturate_d); | ||
| 1090 | break; | 1087 | break; |
| 1091 | } | 1088 | } |
| 1092 | case OpCode::Type::Conversion: { | 1089 | case OpCode::Type::Conversion: { |
| 1093 | ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented"); | 1090 | ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented"); |
| 1094 | ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented"); | 1091 | ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented"); |
| 1095 | ASSERT_MSG(!instr.saturate_a, "Unimplemented"); | ||
| 1096 | 1092 | ||
| 1097 | switch (opcode->GetId()) { | 1093 | switch (opcode->GetId()) { |
| 1098 | case OpCode::Id::I2I_R: { | 1094 | case OpCode::Id::I2I_R: { |
| @@ -1106,7 +1102,7 @@ private: | |||
| 1106 | } | 1102 | } |
| 1107 | 1103 | ||
| 1108 | regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, | 1104 | regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, |
| 1109 | 1); | 1105 | 1, instr.alu.saturate_d); |
| 1110 | break; | 1106 | break; |
| 1111 | } | 1107 | } |
| 1112 | case OpCode::Id::I2F_R: { | 1108 | case OpCode::Id::I2F_R: { |
| @@ -1122,8 +1118,6 @@ private: | |||
| 1122 | break; | 1118 | break; |
| 1123 | } | 1119 | } |
| 1124 | case OpCode::Id::F2F_R: { | 1120 | case OpCode::Id::F2F_R: { |
| 1125 | ASSERT_MSG(!instr.saturate_a, "Unimplemented"); | ||
| 1126 | |||
| 1127 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); | 1121 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); |
| 1128 | 1122 | ||
| 1129 | switch (instr.conversion.f2f.rounding) { | 1123 | switch (instr.conversion.f2f.rounding) { |
| @@ -1149,7 +1143,7 @@ private: | |||
| 1149 | op_a = "abs(" + op_a + ')'; | 1143 | op_a = "abs(" + op_a + ')'; |
| 1150 | } | 1144 | } |
| 1151 | 1145 | ||
| 1152 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | 1146 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d); |
| 1153 | break; | 1147 | break; |
| 1154 | } | 1148 | } |
| 1155 | case OpCode::Id::F2I_R: { | 1149 | case OpCode::Id::F2I_R: { |