summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2018-06-11 23:19:37 -0400
committerGravatar GitHub2018-06-11 23:19:37 -0400
commit2dc8b5c2241f6a18f636bbb7a1ff6403c76e7eab (patch)
treeb6f3b5e2cdaedf6ef94be6437f67fc07054484cd /src
parentMerge pull request #555 from Subv/gpu_sysregs (diff)
parentgl_shader_decompiler: Implement saturate for float instructions. (diff)
downloadyuzu-2dc8b5c2241f6a18f636bbb7a1ff6403c76e7eab.tar.gz
yuzu-2dc8b5c2241f6a18f636bbb7a1ff6403c76e7eab.tar.xz
yuzu-2dc8b5c2241f6a18f636bbb7a1ff6403c76e7eab.zip
Merge pull request #552 from bunnei/sat-fmul
gl_shader_decompiler: Implement saturate for float instructions.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/shader_bytecode.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp68
2 files changed, 32 insertions, 39 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index ec8dbd370..c158ffed2 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -213,7 +213,6 @@ union Instruction {
213 BitField<28, 8, Register> gpr28; 213 BitField<28, 8, Register> gpr28;
214 BitField<39, 8, Register> gpr39; 214 BitField<39, 8, Register> gpr39;
215 BitField<48, 16, u64> opcode; 215 BitField<48, 16, u64> opcode;
216 BitField<50, 1, u64> saturate_a;
217 216
218 union { 217 union {
219 BitField<20, 19, u64> imm20_19; 218 BitField<20, 19, u64> imm20_19;
@@ -222,7 +221,7 @@ union Instruction {
222 BitField<46, 1, u64> abs_a; 221 BitField<46, 1, u64> abs_a;
223 BitField<48, 1, u64> negate_a; 222 BitField<48, 1, u64> negate_a;
224 BitField<49, 1, u64> abs_b; 223 BitField<49, 1, u64> abs_b;
225 BitField<50, 1, u64> abs_d; 224 BitField<50, 1, u64> saturate_d;
226 BitField<56, 1, u64> negate_imm; 225 BitField<56, 1, u64> negate_imm;
227 226
228 union { 227 union {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 67726e7c6..b94b79384 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -299,13 +299,15 @@ public:
299 * @param value The code representing the value to assign. 299 * @param value The code representing the value to assign.
300 * @param dest_num_components Number of components in the destination. 300 * @param dest_num_components Number of components in the destination.
301 * @param value_num_components Number of components in the value. 301 * @param value_num_components Number of components in the value.
302 * @param is_abs Optional, when True, applies absolute value to output. 302 * @param is_saturated Optional, when True, saturates the provided value.
303 * @param dest_elem Optional, the destination element to use for the operation. 303 * @param dest_elem Optional, the destination element to use for the operation.
304 */ 304 */
305 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, 305 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
306 u64 dest_num_components, u64 value_num_components, bool is_abs = false, 306 u64 dest_num_components, u64 value_num_components,
307 u64 dest_elem = 0) { 307 bool is_saturated = false, u64 dest_elem = 0) {
308 SetRegister(reg, elem, value, dest_num_components, value_num_components, is_abs, dest_elem); 308
309 SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
310 dest_num_components, value_num_components, dest_elem);
309 } 311 }
310 312
311 /** 313 /**
@@ -315,18 +317,21 @@ public:
315 * @param value The code representing the value to assign. 317 * @param value The code representing the value to assign.
316 * @param dest_num_components Number of components in the destination. 318 * @param dest_num_components Number of components in the destination.
317 * @param value_num_components Number of components in the value. 319 * @param value_num_components Number of components in the value.
318 * @param is_abs Optional, when True, applies absolute value to output. 320 * @param is_saturated Optional, when True, saturates the provided value.
319 * @param dest_elem Optional, the destination element to use for the operation. 321 * @param dest_elem Optional, the destination element to use for the operation.
320 */ 322 */
321 void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, 323 void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
322 const std::string& value, u64 dest_num_components, 324 const std::string& value, u64 dest_num_components,
323 u64 value_num_components, bool is_abs = false, u64 dest_elem = 0) { 325 u64 value_num_components, bool is_saturated = false,
326 u64 dest_elem = 0) {
327 ASSERT_MSG(!is_saturated, "Unimplemented");
328
324 const std::string func = GetGLSLConversionFunc( 329 const std::string func = GetGLSLConversionFunc(
325 is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger, 330 is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger,
326 GLSLRegister::Type::Float); 331 GLSLRegister::Type::Float);
327 332
328 SetRegister(reg, elem, func + '(' + value + ')', dest_num_components, value_num_components, 333 SetRegister(reg, elem, func + '(' + value + ')', dest_num_components, value_num_components,
329 is_abs, dest_elem); 334 dest_elem);
330 } 335 }
331 336
332 /** 337 /**
@@ -500,12 +505,10 @@ private:
500 * @param value The code representing the value to assign. 505 * @param value The code representing the value to assign.
501 * @param dest_num_components Number of components in the destination. 506 * @param dest_num_components Number of components in the destination.
502 * @param value_num_components Number of components in the value. 507 * @param value_num_components Number of components in the value.
503 * @param is_abs Optional, when True, applies absolute value to output.
504 * @param dest_elem Optional, the destination element to use for the operation. 508 * @param dest_elem Optional, the destination element to use for the operation.
505 */ 509 */
506 void SetRegister(const Register& reg, u64 elem, const std::string& value, 510 void SetRegister(const Register& reg, u64 elem, const std::string& value,
507 u64 dest_num_components, u64 value_num_components, bool is_abs, 511 u64 dest_num_components, u64 value_num_components, u64 dest_elem) {
508 u64 dest_elem) {
509 std::string dest = GetRegister(reg, dest_elem); 512 std::string dest = GetRegister(reg, dest_elem);
510 if (dest_num_components > 1) { 513 if (dest_num_components > 1) {
511 dest += GetSwizzle(elem); 514 dest += GetSwizzle(elem);
@@ -516,8 +519,6 @@ private:
516 src += GetSwizzle(elem); 519 src += GetSwizzle(elem);
517 } 520 }
518 521
519 src = is_abs ? "abs(" + src + ')' : src;
520
521 shader.AddLine(dest + " = " + src + ';'); 522 shader.AddLine(dest + " = " + src + ';');
522 } 523 }
523 524
@@ -808,9 +809,8 @@ private:
808 case OpCode::Id::FMUL_C: 809 case OpCode::Id::FMUL_C:
809 case OpCode::Id::FMUL_R: 810 case OpCode::Id::FMUL_R:
810 case OpCode::Id::FMUL_IMM: { 811 case OpCode::Id::FMUL_IMM: {
811 ASSERT_MSG(!instr.saturate_a, "Unimplemented"); 812 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
812 813 instr.alu.saturate_d);
813 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
814 break; 814 break;
815 } 815 }
816 case OpCode::Id::FMUL32_IMM: { 816 case OpCode::Id::FMUL32_IMM: {
@@ -823,41 +823,39 @@ private:
823 case OpCode::Id::FADD_C: 823 case OpCode::Id::FADD_C:
824 case OpCode::Id::FADD_R: 824 case OpCode::Id::FADD_R:
825 case OpCode::Id::FADD_IMM: { 825 case OpCode::Id::FADD_IMM: {
826 ASSERT_MSG(!instr.saturate_a, "Unimplemented"); 826 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
827 827 instr.alu.saturate_d);
828 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
829 break; 828 break;
830 } 829 }
831 case OpCode::Id::MUFU: { 830 case OpCode::Id::MUFU: {
832 ASSERT_MSG(!instr.saturate_a, "Unimplemented");
833
834 switch (instr.sub_op) { 831 switch (instr.sub_op) {
835 case SubOp::Cos: 832 case SubOp::Cos:
836 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, 833 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
837 instr.alu.abs_d); 834 instr.alu.saturate_d);
838 break; 835 break;
839 case SubOp::Sin: 836 case SubOp::Sin:
840 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, 837 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
841 instr.alu.abs_d); 838 instr.alu.saturate_d);
842 break; 839 break;
843 case SubOp::Ex2: 840 case SubOp::Ex2:
844 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, 841 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
845 instr.alu.abs_d); 842 instr.alu.saturate_d);
846 break; 843 break;
847 case SubOp::Lg2: 844 case SubOp::Lg2:
848 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, 845 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
849 instr.alu.abs_d); 846 instr.alu.saturate_d);
850 break; 847 break;
851 case SubOp::Rcp: 848 case SubOp::Rcp:
852 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, instr.alu.abs_d); 849 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
850 instr.alu.saturate_d);
853 break; 851 break;
854 case SubOp::Rsq: 852 case SubOp::Rsq:
855 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, 853 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
856 instr.alu.abs_d); 854 instr.alu.saturate_d);
857 break; 855 break;
858 case SubOp::Min: 856 case SubOp::Min:
859 regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1, 857 regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1,
860 instr.alu.abs_d); 858 instr.alu.saturate_d);
861 break; 859 break;
862 default: 860 default:
863 NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", 861 NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
@@ -1028,8 +1026,8 @@ private:
1028 case OpCode::Id::IADD_C: 1026 case OpCode::Id::IADD_C:
1029 case OpCode::Id::IADD_R: 1027 case OpCode::Id::IADD_R:
1030 case OpCode::Id::IADD_IMM: { 1028 case OpCode::Id::IADD_IMM: {
1031 ASSERT_MSG(!instr.saturate_a, "Unimplemented"); 1029 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
1032 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1); 1030 instr.alu.saturate_d);
1033 break; 1031 break;
1034 } 1032 }
1035 case OpCode::Id::ISCADD_C: 1033 case OpCode::Id::ISCADD_C:
@@ -1051,8 +1049,6 @@ private:
1051 break; 1049 break;
1052 } 1050 }
1053 case OpCode::Type::Ffma: { 1051 case OpCode::Type::Ffma: {
1054 ASSERT_MSG(!instr.saturate_a, "Unimplemented");
1055
1056 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); 1052 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
1057 std::string op_b = instr.ffma.negate_b ? "-" : ""; 1053 std::string op_b = instr.ffma.negate_b ? "-" : "";
1058 std::string op_c = instr.ffma.negate_c ? "-" : ""; 1054 std::string op_c = instr.ffma.negate_c ? "-" : "";
@@ -1086,13 +1082,13 @@ private:
1086 } 1082 }
1087 } 1083 }
1088 1084
1089 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1); 1085 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1,
1086 instr.alu.saturate_d);
1090 break; 1087 break;
1091 } 1088 }
1092 case OpCode::Type::Conversion: { 1089 case OpCode::Type::Conversion: {
1093 ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented"); 1090 ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented");
1094 ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented"); 1091 ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
1095 ASSERT_MSG(!instr.saturate_a, "Unimplemented");
1096 1092
1097 switch (opcode->GetId()) { 1093 switch (opcode->GetId()) {
1098 case OpCode::Id::I2I_R: { 1094 case OpCode::Id::I2I_R: {
@@ -1106,7 +1102,7 @@ private:
1106 } 1102 }
1107 1103
1108 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 1104 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
1109 1); 1105 1, instr.alu.saturate_d);
1110 break; 1106 break;
1111 } 1107 }
1112 case OpCode::Id::I2F_R: { 1108 case OpCode::Id::I2F_R: {
@@ -1122,8 +1118,6 @@ private:
1122 break; 1118 break;
1123 } 1119 }
1124 case OpCode::Id::F2F_R: { 1120 case OpCode::Id::F2F_R: {
1125 ASSERT_MSG(!instr.saturate_a, "Unimplemented");
1126
1127 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); 1121 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
1128 1122
1129 switch (instr.conversion.f2f.rounding) { 1123 switch (instr.conversion.f2f.rounding) {
@@ -1149,7 +1143,7 @@ private:
1149 op_a = "abs(" + op_a + ')'; 1143 op_a = "abs(" + op_a + ')';
1150 } 1144 }
1151 1145
1152 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); 1146 regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d);
1153 break; 1147 break;
1154 } 1148 }
1155 case OpCode::Id::F2I_R: { 1149 case OpCode::Id::F2I_R: {