summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar FernandoS272018-10-28 11:38:40 -0400
committerGravatar FernandoS272018-10-28 11:38:40 -0400
commite5ca097e32d9ec03b1ba4b5e44e3b6553e3addd4 (patch)
tree4d67b25f67f13f69f4be495a8f01dfdd89418f7c
parentImproved Shader accuracy on Vertex and Geometry Shaders with FFMA, FMUL and FADD (diff)
downloadyuzu-e5ca097e32d9ec03b1ba4b5e44e3b6553e3addd4.tar.gz
yuzu-e5ca097e32d9ec03b1ba4b5e44e3b6553e3addd4.tar.xz
yuzu-e5ca097e32d9ec03b1ba4b5e44e3b6553e3addd4.zip
Refactor precise usage and add FMNMX, MUFU, FMUL32 and FADD332
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp102
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp9
2 files changed, 37 insertions, 74 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 002ae90a7..dcf6941b0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -341,10 +341,10 @@ public:
341 */ 341 */
342 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, 342 void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
343 u64 dest_num_components, u64 value_num_components, 343 u64 dest_num_components, u64 value_num_components,
344 bool is_saturated = false, u64 dest_elem = 0) { 344 bool is_saturated = false, u64 dest_elem = 0, bool precise = false) {
345 345
346 SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, 346 SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
347 dest_num_components, value_num_components, dest_elem); 347 dest_num_components, value_num_components, dest_elem, precise);
348 } 348 }
349 349
350 /** 350 /**
@@ -368,7 +368,7 @@ public:
368 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; 368 const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
369 369
370 SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', 370 SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
371 dest_num_components, value_num_components, dest_elem); 371 dest_num_components, value_num_components, dest_elem, false);
372 372
373 if (sets_cc) { 373 if (sets_cc) {
374 const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; 374 const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
@@ -416,7 +416,7 @@ public:
416 } 416 }
417 }(); 417 }();
418 418
419 SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem); 419 SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false);
420 } 420 }
421 421
422 /** 422 /**
@@ -757,7 +757,8 @@ private:
757 * @param dest_elem Optional, the destination element to use for the operation. 757 * @param dest_elem Optional, the destination element to use for the operation.
758 */ 758 */
759 void SetRegister(const Register& reg, u64 elem, const std::string& value, 759 void SetRegister(const Register& reg, u64 elem, const std::string& value,
760 u64 dest_num_components, u64 value_num_components, u64 dest_elem) { 760 u64 dest_num_components, u64 value_num_components, u64 dest_elem,
761 bool precise) {
761 if (reg == Register::ZeroIndex) { 762 if (reg == Register::ZeroIndex) {
762 LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex"); 763 LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex");
763 UNREACHABLE(); 764 UNREACHABLE();
@@ -774,7 +775,18 @@ private:
774 src += GetSwizzle(elem); 775 src += GetSwizzle(elem);
775 } 776 }
776 777
777 shader.AddLine(dest + " = " + src + ';'); 778 if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
779 shader.AddLine('{');
780 ++shader.scope;
781 // This avoids optimizations of constant propagation and keeps the code as the original
782 // Sadly using the precise keyword causes "linking" errors on fragment shaders.
783 shader.AddLine("precise float tmp = " + src + ';');
784 shader.AddLine(dest + " = tmp;");
785 --shader.scope;
786 shader.AddLine('}');
787 } else {
788 shader.AddLine(dest + " = " + src + ';');
789 }
778 } 790 }
779 791
780 /// Build the GLSL register list. 792 /// Build the GLSL register list.
@@ -1511,24 +1523,8 @@ private:
1511 1523
1512 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); 1524 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
1513 1525
1514 shader.AddLine('{'); 1526 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
1515 ++shader.scope; 1527 instr.alu.saturate_d, 0, true);
1516
1517 // This avoids optimizations of constant propagation and keeps the code as the original
1518 // Sadly using the precise keyword causes "linking" errors on fragment shaders.
1519 if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
1520 shader.AddLine("float tmp = " + op_a + " * " + op_b + ';');
1521 } else {
1522 shader.AddLine("precise float tmp = " + op_a + " * " + op_b + ';');
1523 }
1524
1525
1526 regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1,
1527 instr.alu.saturate_d);
1528
1529
1530 --shader.scope;
1531 shader.AddLine('}');
1532 break; 1528 break;
1533 } 1529 }
1534 case OpCode::Id::FADD_C: 1530 case OpCode::Id::FADD_C:
@@ -1537,24 +1533,8 @@ private:
1537 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); 1533 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
1538 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); 1534 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
1539 1535
1540 shader.AddLine('{'); 1536 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
1541 ++shader.scope; 1537 instr.alu.saturate_d, 0, true);
1542
1543 // This avoids optimizations of constant propagation and keeps the code as the original
1544 // Sadly using the precise keyword causes "linking" errors on fragment shaders.
1545 if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
1546 shader.AddLine("float tmp = " + op_a + " + " + op_b + ';');
1547 } else {
1548 shader.AddLine("precise float tmp = " + op_a + " + " + op_b + ';');
1549 }
1550 regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1,
1551 instr.alu.saturate_d);
1552
1553
1554 --shader.scope;
1555 shader.AddLine('}');
1556
1557
1558 break; 1538 break;
1559 } 1539 }
1560 case OpCode::Id::MUFU: { 1540 case OpCode::Id::MUFU: {
@@ -1562,31 +1542,31 @@ private:
1562 switch (instr.sub_op) { 1542 switch (instr.sub_op) {
1563 case SubOp::Cos: 1543 case SubOp::Cos:
1564 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, 1544 regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
1565 instr.alu.saturate_d); 1545 instr.alu.saturate_d, 0, true);
1566 break; 1546 break;
1567 case SubOp::Sin: 1547 case SubOp::Sin:
1568 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, 1548 regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
1569 instr.alu.saturate_d); 1549 instr.alu.saturate_d, 0, true);
1570 break; 1550 break;
1571 case SubOp::Ex2: 1551 case SubOp::Ex2:
1572 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, 1552 regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
1573 instr.alu.saturate_d); 1553 instr.alu.saturate_d, 0, true);
1574 break; 1554 break;
1575 case SubOp::Lg2: 1555 case SubOp::Lg2:
1576 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, 1556 regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
1577 instr.alu.saturate_d); 1557 instr.alu.saturate_d, 0, true);
1578 break; 1558 break;
1579 case SubOp::Rcp: 1559 case SubOp::Rcp:
1580 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, 1560 regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
1581 instr.alu.saturate_d); 1561 instr.alu.saturate_d, 0, true);
1582 break; 1562 break;
1583 case SubOp::Rsq: 1563 case SubOp::Rsq:
1584 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, 1564 regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
1585 instr.alu.saturate_d); 1565 instr.alu.saturate_d, 0, true);
1586 break; 1566 break;
1587 case SubOp::Sqrt: 1567 case SubOp::Sqrt:
1588 regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, 1568 regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
1589 instr.alu.saturate_d); 1569 instr.alu.saturate_d, 0, true);
1590 break; 1570 break;
1591 default: 1571 default:
1592 LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", 1572 LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
@@ -1607,7 +1587,7 @@ private:
1607 regs.SetRegisterToFloat(instr.gpr0, 0, 1587 regs.SetRegisterToFloat(instr.gpr0, 0,
1608 '(' + condition + ") ? min(" + parameters + ") : max(" + 1588 '(' + condition + ") ? min(" + parameters + ") : max(" +
1609 parameters + ')', 1589 parameters + ')',
1610 1, 1); 1590 1, 1, false, 0, true);
1611 break; 1591 break;
1612 } 1592 }
1613 case OpCode::Id::RRO_C: 1593 case OpCode::Id::RRO_C:
@@ -1636,7 +1616,7 @@ private:
1636 regs.SetRegisterToFloat(instr.gpr0, 0, 1616 regs.SetRegisterToFloat(instr.gpr0, 0,
1637 regs.GetRegisterAsFloat(instr.gpr8) + " * " + 1617 regs.GetRegisterAsFloat(instr.gpr8) + " * " +
1638 GetImmediate32(instr), 1618 GetImmediate32(instr),
1639 1, 1, instr.fmul32.saturate); 1619 1, 1, instr.fmul32.saturate, 0, true);
1640 break; 1620 break;
1641 } 1621 }
1642 case OpCode::Id::FADD32I: { 1622 case OpCode::Id::FADD32I: {
@@ -1659,7 +1639,7 @@ private:
1659 op_b = "-(" + op_b + ')'; 1639 op_b = "-(" + op_b + ')';
1660 } 1640 }
1661 1641
1662 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1); 1642 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true);
1663 break; 1643 break;
1664 } 1644 }
1665 } 1645 }
@@ -2121,23 +2101,9 @@ private:
2121 } 2101 }
2122 } 2102 }
2123 2103
2124 shader.AddLine('{'); 2104 regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')',
2125 ++shader.scope; 2105 1, 1, instr.alu.saturate_d, 0, true);
2126 2106
2127 // This avoids optimizations of constant propagation and keeps the code as the original
2128 // Sadly using the precise keyword causes "linking" errors on fragment shaders.
2129 if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
2130 shader.AddLine("float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");");
2131 } else {
2132 shader.AddLine("precise float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");");
2133 }
2134
2135 regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1,
2136 instr.alu.saturate_d);
2137
2138
2139 --shader.scope;
2140 shader.AddLine('}');
2141 break; 2107 break;
2142 } 2108 }
2143 case OpCode::Type::Hfma2: { 2109 case OpCode::Type::Hfma2: {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 08dd8dc6c..dfb562706 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -15,8 +15,7 @@ static constexpr u32 PROGRAM_OFFSET{10};
15 15
16ProgramResult GenerateVertexShader(const ShaderSetup& setup) { 16ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
17 std::string out = "#version 430 core\n"; 17 std::string out = "#version 430 core\n";
18 out += "#extension GL_ARB_separate_shader_objects : enable\n"; 18 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
19 out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
20 out += Decompiler::GetCommonDeclarations(); 19 out += Decompiler::GetCommonDeclarations();
21 20
22 out += R"( 21 out += R"(
@@ -84,8 +83,7 @@ void main() {
84 83
85ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { 84ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
86 std::string out = "#version 430 core\n"; 85 std::string out = "#version 430 core\n";
87 out += "#extension GL_ARB_separate_shader_objects : enable\n"; 86 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
88 out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
89 out += Decompiler::GetCommonDeclarations(); 87 out += Decompiler::GetCommonDeclarations();
90 out += "bool exec_geometry();\n"; 88 out += "bool exec_geometry();\n";
91 89
@@ -119,8 +117,7 @@ void main() {
119 117
120ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { 118ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
121 std::string out = "#version 430 core\n"; 119 std::string out = "#version 430 core\n";
122 out += "#extension GL_ARB_separate_shader_objects : enable\n"; 120 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
123 out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
124 out += Decompiler::GetCommonDeclarations(); 121 out += Decompiler::GetCommonDeclarations();
125 out += "bool exec_fragment();\n"; 122 out += "bool exec_fragment();\n";
126 123