diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 102 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 9 |
2 files changed, 37 insertions, 74 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 002ae90a7..dcf6941b0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -341,10 +341,10 @@ public: | |||
| 341 | */ | 341 | */ |
| 342 | void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, | 342 | void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, |
| 343 | u64 dest_num_components, u64 value_num_components, | 343 | u64 dest_num_components, u64 value_num_components, |
| 344 | bool is_saturated = false, u64 dest_elem = 0) { | 344 | bool is_saturated = false, u64 dest_elem = 0, bool precise = false) { |
| 345 | 345 | ||
| 346 | SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, | 346 | SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, |
| 347 | dest_num_components, value_num_components, dest_elem); | 347 | dest_num_components, value_num_components, dest_elem, precise); |
| 348 | } | 348 | } |
| 349 | 349 | ||
| 350 | /** | 350 | /** |
| @@ -368,7 +368,7 @@ public: | |||
| 368 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | 368 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; |
| 369 | 369 | ||
| 370 | SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', | 370 | SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', |
| 371 | dest_num_components, value_num_components, dest_elem); | 371 | dest_num_components, value_num_components, dest_elem, false); |
| 372 | 372 | ||
| 373 | if (sets_cc) { | 373 | if (sets_cc) { |
| 374 | const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; | 374 | const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; |
| @@ -416,7 +416,7 @@ public: | |||
| 416 | } | 416 | } |
| 417 | }(); | 417 | }(); |
| 418 | 418 | ||
| 419 | SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem); | 419 | SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false); |
| 420 | } | 420 | } |
| 421 | 421 | ||
| 422 | /** | 422 | /** |
| @@ -757,7 +757,8 @@ private: | |||
| 757 | * @param dest_elem Optional, the destination element to use for the operation. | 757 | * @param dest_elem Optional, the destination element to use for the operation. |
| 758 | */ | 758 | */ |
| 759 | void SetRegister(const Register& reg, u64 elem, const std::string& value, | 759 | void SetRegister(const Register& reg, u64 elem, const std::string& value, |
| 760 | u64 dest_num_components, u64 value_num_components, u64 dest_elem) { | 760 | u64 dest_num_components, u64 value_num_components, u64 dest_elem, |
| 761 | bool precise) { | ||
| 761 | if (reg == Register::ZeroIndex) { | 762 | if (reg == Register::ZeroIndex) { |
| 762 | LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex"); | 763 | LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex"); |
| 763 | UNREACHABLE(); | 764 | UNREACHABLE(); |
| @@ -774,7 +775,18 @@ private: | |||
| 774 | src += GetSwizzle(elem); | 775 | src += GetSwizzle(elem); |
| 775 | } | 776 | } |
| 776 | 777 | ||
| 777 | shader.AddLine(dest + " = " + src + ';'); | 778 | if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { |
| 779 | shader.AddLine('{'); | ||
| 780 | ++shader.scope; | ||
| 781 | // This avoids optimizations of constant propagation and keeps the code as the original | ||
| 782 | // Sadly using the precise keyword causes "linking" errors on fragment shaders. | ||
| 783 | shader.AddLine("precise float tmp = " + src + ';'); | ||
| 784 | shader.AddLine(dest + " = tmp;"); | ||
| 785 | --shader.scope; | ||
| 786 | shader.AddLine('}'); | ||
| 787 | } else { | ||
| 788 | shader.AddLine(dest + " = " + src + ';'); | ||
| 789 | } | ||
| 778 | } | 790 | } |
| 779 | 791 | ||
| 780 | /// Build the GLSL register list. | 792 | /// Build the GLSL register list. |
| @@ -1511,24 +1523,8 @@ private: | |||
| 1511 | 1523 | ||
| 1512 | op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); | 1524 | op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); |
| 1513 | 1525 | ||
| 1514 | shader.AddLine('{'); | 1526 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, |
| 1515 | ++shader.scope; | 1527 | instr.alu.saturate_d, 0, true); |
| 1516 | |||
| 1517 | // This avoids optimizations of constant propagation and keeps the code as the original | ||
| 1518 | // Sadly using the precise keyword causes "linking" errors on fragment shaders. | ||
| 1519 | if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 1520 | shader.AddLine("float tmp = " + op_a + " * " + op_b + ';'); | ||
| 1521 | } else { | ||
| 1522 | shader.AddLine("precise float tmp = " + op_a + " * " + op_b + ';'); | ||
| 1523 | } | ||
| 1524 | |||
| 1525 | |||
| 1526 | regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1, | ||
| 1527 | instr.alu.saturate_d); | ||
| 1528 | |||
| 1529 | |||
| 1530 | --shader.scope; | ||
| 1531 | shader.AddLine('}'); | ||
| 1532 | break; | 1528 | break; |
| 1533 | } | 1529 | } |
| 1534 | case OpCode::Id::FADD_C: | 1530 | case OpCode::Id::FADD_C: |
| @@ -1537,24 +1533,8 @@ private: | |||
| 1537 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | 1533 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); |
| 1538 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | 1534 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); |
| 1539 | 1535 | ||
| 1540 | shader.AddLine('{'); | 1536 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, |
| 1541 | ++shader.scope; | 1537 | instr.alu.saturate_d, 0, true); |
| 1542 | |||
| 1543 | // This avoids optimizations of constant propagation and keeps the code as the original | ||
| 1544 | // Sadly using the precise keyword causes "linking" errors on fragment shaders. | ||
| 1545 | if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 1546 | shader.AddLine("float tmp = " + op_a + " + " + op_b + ';'); | ||
| 1547 | } else { | ||
| 1548 | shader.AddLine("precise float tmp = " + op_a + " + " + op_b + ';'); | ||
| 1549 | } | ||
| 1550 | regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1, | ||
| 1551 | instr.alu.saturate_d); | ||
| 1552 | |||
| 1553 | |||
| 1554 | --shader.scope; | ||
| 1555 | shader.AddLine('}'); | ||
| 1556 | |||
| 1557 | |||
| 1558 | break; | 1538 | break; |
| 1559 | } | 1539 | } |
| 1560 | case OpCode::Id::MUFU: { | 1540 | case OpCode::Id::MUFU: { |
| @@ -1562,31 +1542,31 @@ private: | |||
| 1562 | switch (instr.sub_op) { | 1542 | switch (instr.sub_op) { |
| 1563 | case SubOp::Cos: | 1543 | case SubOp::Cos: |
| 1564 | regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, | 1544 | regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, |
| 1565 | instr.alu.saturate_d); | 1545 | instr.alu.saturate_d, 0, true); |
| 1566 | break; | 1546 | break; |
| 1567 | case SubOp::Sin: | 1547 | case SubOp::Sin: |
| 1568 | regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, | 1548 | regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, |
| 1569 | instr.alu.saturate_d); | 1549 | instr.alu.saturate_d, 0, true); |
| 1570 | break; | 1550 | break; |
| 1571 | case SubOp::Ex2: | 1551 | case SubOp::Ex2: |
| 1572 | regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, | 1552 | regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, |
| 1573 | instr.alu.saturate_d); | 1553 | instr.alu.saturate_d, 0, true); |
| 1574 | break; | 1554 | break; |
| 1575 | case SubOp::Lg2: | 1555 | case SubOp::Lg2: |
| 1576 | regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, | 1556 | regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, |
| 1577 | instr.alu.saturate_d); | 1557 | instr.alu.saturate_d, 0, true); |
| 1578 | break; | 1558 | break; |
| 1579 | case SubOp::Rcp: | 1559 | case SubOp::Rcp: |
| 1580 | regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, | 1560 | regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, |
| 1581 | instr.alu.saturate_d); | 1561 | instr.alu.saturate_d, 0, true); |
| 1582 | break; | 1562 | break; |
| 1583 | case SubOp::Rsq: | 1563 | case SubOp::Rsq: |
| 1584 | regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, | 1564 | regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, |
| 1585 | instr.alu.saturate_d); | 1565 | instr.alu.saturate_d, 0, true); |
| 1586 | break; | 1566 | break; |
| 1587 | case SubOp::Sqrt: | 1567 | case SubOp::Sqrt: |
| 1588 | regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, | 1568 | regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, |
| 1589 | instr.alu.saturate_d); | 1569 | instr.alu.saturate_d, 0, true); |
| 1590 | break; | 1570 | break; |
| 1591 | default: | 1571 | default: |
| 1592 | LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", | 1572 | LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", |
| @@ -1607,7 +1587,7 @@ private: | |||
| 1607 | regs.SetRegisterToFloat(instr.gpr0, 0, | 1587 | regs.SetRegisterToFloat(instr.gpr0, 0, |
| 1608 | '(' + condition + ") ? min(" + parameters + ") : max(" + | 1588 | '(' + condition + ") ? min(" + parameters + ") : max(" + |
| 1609 | parameters + ')', | 1589 | parameters + ')', |
| 1610 | 1, 1); | 1590 | 1, 1, false, 0, true); |
| 1611 | break; | 1591 | break; |
| 1612 | } | 1592 | } |
| 1613 | case OpCode::Id::RRO_C: | 1593 | case OpCode::Id::RRO_C: |
| @@ -1636,7 +1616,7 @@ private: | |||
| 1636 | regs.SetRegisterToFloat(instr.gpr0, 0, | 1616 | regs.SetRegisterToFloat(instr.gpr0, 0, |
| 1637 | regs.GetRegisterAsFloat(instr.gpr8) + " * " + | 1617 | regs.GetRegisterAsFloat(instr.gpr8) + " * " + |
| 1638 | GetImmediate32(instr), | 1618 | GetImmediate32(instr), |
| 1639 | 1, 1, instr.fmul32.saturate); | 1619 | 1, 1, instr.fmul32.saturate, 0, true); |
| 1640 | break; | 1620 | break; |
| 1641 | } | 1621 | } |
| 1642 | case OpCode::Id::FADD32I: { | 1622 | case OpCode::Id::FADD32I: { |
| @@ -1659,7 +1639,7 @@ private: | |||
| 1659 | op_b = "-(" + op_b + ')'; | 1639 | op_b = "-(" + op_b + ')'; |
| 1660 | } | 1640 | } |
| 1661 | 1641 | ||
| 1662 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1); | 1642 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true); |
| 1663 | break; | 1643 | break; |
| 1664 | } | 1644 | } |
| 1665 | } | 1645 | } |
| @@ -2121,23 +2101,9 @@ private: | |||
| 2121 | } | 2101 | } |
| 2122 | } | 2102 | } |
| 2123 | 2103 | ||
| 2124 | shader.AddLine('{'); | 2104 | regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', |
| 2125 | ++shader.scope; | 2105 | 1, 1, instr.alu.saturate_d, 0, true); |
| 2126 | 2106 | ||
| 2127 | // This avoids optimizations of constant propagation and keeps the code as the original | ||
| 2128 | // Sadly using the precise keyword causes "linking" errors on fragment shaders. | ||
| 2129 | if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 2130 | shader.AddLine("float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");"); | ||
| 2131 | } else { | ||
| 2132 | shader.AddLine("precise float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");"); | ||
| 2133 | } | ||
| 2134 | |||
| 2135 | regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1, | ||
| 2136 | instr.alu.saturate_d); | ||
| 2137 | |||
| 2138 | |||
| 2139 | --shader.scope; | ||
| 2140 | shader.AddLine('}'); | ||
| 2141 | break; | 2107 | break; |
| 2142 | } | 2108 | } |
| 2143 | case OpCode::Type::Hfma2: { | 2109 | case OpCode::Type::Hfma2: { |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 08dd8dc6c..dfb562706 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -15,8 +15,7 @@ static constexpr u32 PROGRAM_OFFSET{10}; | |||
| 15 | 15 | ||
| 16 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { | 16 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { |
| 17 | std::string out = "#version 430 core\n"; | 17 | std::string out = "#version 430 core\n"; |
| 18 | out += "#extension GL_ARB_separate_shader_objects : enable\n"; | 18 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| 19 | out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; | ||
| 20 | out += Decompiler::GetCommonDeclarations(); | 19 | out += Decompiler::GetCommonDeclarations(); |
| 21 | 20 | ||
| 22 | out += R"( | 21 | out += R"( |
| @@ -84,8 +83,7 @@ void main() { | |||
| 84 | 83 | ||
| 85 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { | 84 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { |
| 86 | std::string out = "#version 430 core\n"; | 85 | std::string out = "#version 430 core\n"; |
| 87 | out += "#extension GL_ARB_separate_shader_objects : enable\n"; | 86 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| 88 | out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; | ||
| 89 | out += Decompiler::GetCommonDeclarations(); | 87 | out += Decompiler::GetCommonDeclarations(); |
| 90 | out += "bool exec_geometry();\n"; | 88 | out += "bool exec_geometry();\n"; |
| 91 | 89 | ||
| @@ -119,8 +117,7 @@ void main() { | |||
| 119 | 117 | ||
| 120 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { | 118 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { |
| 121 | std::string out = "#version 430 core\n"; | 119 | std::string out = "#version 430 core\n"; |
| 122 | out += "#extension GL_ARB_separate_shader_objects : enable\n"; | 120 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| 123 | out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; | ||
| 124 | out += Decompiler::GetCommonDeclarations(); | 121 | out += Decompiler::GetCommonDeclarations(); |
| 125 | out += "bool exec_fragment();\n"; | 122 | out += "bool exec_fragment();\n"; |
| 126 | 123 | ||