summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2018-10-28 13:06:21 -0400
committerGravatar GitHub2018-10-28 13:06:21 -0400
commitaa1cf608ed20984c410fc215d9f73937abe76ddc (patch)
tree1167c5b03eb2526f2704195a27d50f11430f583c /src
parentMerge pull request #1606 from FearlessTobi/revert-1581-macosx-target-version (diff)
parentRefactor precise usage and add FMNMX, MUFU, FMUL32 and FADD332 (diff)
downloadyuzu-aa1cf608ed20984c410fc215d9f73937abe76ddc.tar.gz
yuzu-aa1cf608ed20984c410fc215d9f73937abe76ddc.tar.xz
yuzu-aa1cf608ed20984c410fc215d9f73937abe76ddc.zip
Merge pull request #1601 from FernandoS27/shader-precision
Improved Shader accuracy on Vertex and Geometry Shaders.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp55
1 files changed, 35 insertions, 20 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index dec291a7d..dcf6941b0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -341,10 +341,10 @@ public:
341 */ 341 */
342 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, 342 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
343 u64 dest_num_components, u64 value_num_components, 343 u64 dest_num_components, u64 value_num_components,
344 bool is_saturated = false, u64 dest_elem = 0) { 344 bool is_saturated = false, u64 dest_elem = 0, bool precise = false) {
345 345
346 SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, 346 SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
347 dest_num_components, value_num_components, dest_elem); 347 dest_num_components, value_num_components, dest_elem, precise);
348 } 348 }
349 349
350 /** 350 /**
@@ -368,7 +368,7 @@ public:
368 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; 368 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
369 369
370 SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', 370 SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
371 dest_num_components, value_num_components, dest_elem); 371 dest_num_components, value_num_components, dest_elem, false);
372 372
373 if (sets_cc) { 373 if (sets_cc) {
374 const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; 374 const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
@@ -416,7 +416,7 @@ public:
416 } 416 }
417 }(); 417 }();
418 418
419 SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem); 419 SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false);
420 } 420 }
421 421
422 /** 422 /**
@@ -757,7 +757,8 @@ private:
757 * @param dest_elem Optional, the destination element to use for the operation. 757 * @param dest_elem Optional, the destination element to use for the operation.
758 */ 758 */
759 void SetRegister(const Register& reg, u64 elem, const std::string& value, 759 void SetRegister(const Register& reg, u64 elem, const std::string& value,
760 u64 dest_num_components, u64 value_num_components, u64 dest_elem) { 760 u64 dest_num_components, u64 value_num_components, u64 dest_elem,
761 bool precise) {
761 if (reg == Register::ZeroIndex) { 762 if (reg == Register::ZeroIndex) {
762 LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex"); 763 LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex");
763 UNREACHABLE(); 764 UNREACHABLE();
@@ -774,7 +775,18 @@ private:
774 src += GetSwizzle(elem); 775 src += GetSwizzle(elem);
775 } 776 }
776 777
777 shader.AddLine(dest + " = " + src + ';'); 778 if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
779 shader.AddLine('{');
780 ++shader.scope;
781 // This avoids optimizations of constant propagation and keeps the code as the original
782 // Sadly using the precise keyword causes "linking" errors on fragment shaders.
783 shader.AddLine("precise float tmp = " + src + ';');
784 shader.AddLine(dest + " = tmp;");
785 --shader.scope;
786 shader.AddLine('}');
787 } else {
788 shader.AddLine(dest + " = " + src + ';');
789 }
778 } 790 }
779 791
780 /// Build the GLSL register list. 792 /// Build the GLSL register list.
@@ -1510,8 +1522,9 @@ private:
1510 ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented"); 1522 ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented");
1511 1523
1512 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); 1524 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
1525
1513 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, 1526 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
1514 instr.alu.saturate_d); 1527 instr.alu.saturate_d, 0, true);
1515 break; 1528 break;
1516 } 1529 }
1517 case OpCode::Id::FADD_C: 1530 case OpCode::Id::FADD_C:
@@ -1519,8 +1532,9 @@ private:
1519 case OpCode::Id::FADD_IMM: { 1532 case OpCode::Id::FADD_IMM: {
1520 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); 1533 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
1521 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); 1534 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
1535
1522 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, 1536 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
1523 instr.alu.saturate_d); 1537 instr.alu.saturate_d, 0, true);
1524 break; 1538 break;
1525 } 1539 }
1526 case OpCode::Id::MUFU: { 1540 case OpCode::Id::MUFU: {
@@ -1528,31 +1542,31 @@ private:
1528 switch (instr.sub_op) { 1542 switch (instr.sub_op) {
1529 case SubOp::Cos: 1543 case SubOp::Cos:
1530 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, 1544 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
1531 instr.alu.saturate_d); 1545 instr.alu.saturate_d, 0, true);
1532 break; 1546 break;
1533 case SubOp::Sin: 1547 case SubOp::Sin:
1534 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, 1548 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
1535 instr.alu.saturate_d); 1549 instr.alu.saturate_d, 0, true);
1536 break; 1550 break;
1537 case SubOp::Ex2: 1551 case SubOp::Ex2:
1538 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, 1552 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
1539 instr.alu.saturate_d); 1553 instr.alu.saturate_d, 0, true);
1540 break; 1554 break;
1541 case SubOp::Lg2: 1555 case SubOp::Lg2:
1542 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, 1556 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
1543 instr.alu.saturate_d); 1557 instr.alu.saturate_d, 0, true);
1544 break; 1558 break;
1545 case SubOp::Rcp: 1559 case SubOp::Rcp:
1546 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, 1560 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
1547 instr.alu.saturate_d); 1561 instr.alu.saturate_d, 0, true);
1548 break; 1562 break;
1549 case SubOp::Rsq: 1563 case SubOp::Rsq:
1550 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, 1564 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
1551 instr.alu.saturate_d); 1565 instr.alu.saturate_d, 0, true);
1552 break; 1566 break;
1553 case SubOp::Sqrt: 1567 case SubOp::Sqrt:
1554 regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, 1568 regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
1555 instr.alu.saturate_d); 1569 instr.alu.saturate_d, 0, true);
1556 break; 1570 break;
1557 default: 1571 default:
1558 LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", 1572 LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
@@ -1573,7 +1587,7 @@ private:
1573 regs.SetRegisterToFloat(instr.gpr0, 0, 1587 regs.SetRegisterToFloat(instr.gpr0, 0,
1574 '(' + condition + ") ? min(" + parameters + ") : max(" + 1588 '(' + condition + ") ? min(" + parameters + ") : max(" +
1575 parameters + ')', 1589 parameters + ')',
1576 1, 1); 1590 1, 1, false, 0, true);
1577 break; 1591 break;
1578 } 1592 }
1579 case OpCode::Id::RRO_C: 1593 case OpCode::Id::RRO_C:
@@ -1602,7 +1616,7 @@ private:
1602 regs.SetRegisterToFloat(instr.gpr0, 0, 1616 regs.SetRegisterToFloat(instr.gpr0, 0,
1603 regs.GetRegisterAsFloat(instr.gpr8) + " * " + 1617 regs.GetRegisterAsFloat(instr.gpr8) + " * " +
1604 GetImmediate32(instr), 1618 GetImmediate32(instr),
1605 1, 1, instr.fmul32.saturate); 1619 1, 1, instr.fmul32.saturate, 0, true);
1606 break; 1620 break;
1607 } 1621 }
1608 case OpCode::Id::FADD32I: { 1622 case OpCode::Id::FADD32I: {
@@ -1625,7 +1639,7 @@ private:
1625 op_b = "-(" + op_b + ')'; 1639 op_b = "-(" + op_b + ')';
1626 } 1640 }
1627 1641
1628 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1); 1642 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true);
1629 break; 1643 break;
1630 } 1644 }
1631 } 1645 }
@@ -2087,8 +2101,9 @@ private:
2087 } 2101 }
2088 } 2102 }
2089 2103
2090 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1, 2104 regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')',
2091 instr.alu.saturate_d); 2105 1, 1, instr.alu.saturate_d, 0, true);
2106
2092 break; 2107 break;
2093 } 2108 }
2094 case OpCode::Type::Hfma2: { 2109 case OpCode::Type::Hfma2: {