diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 55 |
1 files changed, 35 insertions, 20 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index dec291a7d..dcf6941b0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -341,10 +341,10 @@ public: | |||
| 341 | */ | 341 | */ |
| 342 | void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, | 342 | void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, |
| 343 | u64 dest_num_components, u64 value_num_components, | 343 | u64 dest_num_components, u64 value_num_components, |
| 344 | bool is_saturated = false, u64 dest_elem = 0) { | 344 | bool is_saturated = false, u64 dest_elem = 0, bool precise = false) { |
| 345 | 345 | ||
| 346 | SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, | 346 | SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, |
| 347 | dest_num_components, value_num_components, dest_elem); | 347 | dest_num_components, value_num_components, dest_elem, precise); |
| 348 | } | 348 | } |
| 349 | 349 | ||
| 350 | /** | 350 | /** |
| @@ -368,7 +368,7 @@ public: | |||
| 368 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | 368 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; |
| 369 | 369 | ||
| 370 | SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', | 370 | SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', |
| 371 | dest_num_components, value_num_components, dest_elem); | 371 | dest_num_components, value_num_components, dest_elem, false); |
| 372 | 372 | ||
| 373 | if (sets_cc) { | 373 | if (sets_cc) { |
| 374 | const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; | 374 | const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; |
| @@ -416,7 +416,7 @@ public: | |||
| 416 | } | 416 | } |
| 417 | }(); | 417 | }(); |
| 418 | 418 | ||
| 419 | SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem); | 419 | SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false); |
| 420 | } | 420 | } |
| 421 | 421 | ||
| 422 | /** | 422 | /** |
| @@ -757,7 +757,8 @@ private: | |||
| 757 | * @param dest_elem Optional, the destination element to use for the operation. | 757 | * @param dest_elem Optional, the destination element to use for the operation. |
| 758 | */ | 758 | */ |
| 759 | void SetRegister(const Register& reg, u64 elem, const std::string& value, | 759 | void SetRegister(const Register& reg, u64 elem, const std::string& value, |
| 760 | u64 dest_num_components, u64 value_num_components, u64 dest_elem) { | 760 | u64 dest_num_components, u64 value_num_components, u64 dest_elem, |
| 761 | bool precise) { | ||
| 761 | if (reg == Register::ZeroIndex) { | 762 | if (reg == Register::ZeroIndex) { |
| 762 | LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex"); | 763 | LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex"); |
| 763 | UNREACHABLE(); | 764 | UNREACHABLE(); |
| @@ -774,7 +775,18 @@ private: | |||
| 774 | src += GetSwizzle(elem); | 775 | src += GetSwizzle(elem); |
| 775 | } | 776 | } |
| 776 | 777 | ||
| 777 | shader.AddLine(dest + " = " + src + ';'); | 778 | if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { |
| 779 | shader.AddLine('{'); | ||
| 780 | ++shader.scope; | ||
| 781 | // This avoids optimizations of constant propagation and keeps the code as the original | ||
| 782 | // Sadly using the precise keyword causes "linking" errors on fragment shaders. | ||
| 783 | shader.AddLine("precise float tmp = " + src + ';'); | ||
| 784 | shader.AddLine(dest + " = tmp;"); | ||
| 785 | --shader.scope; | ||
| 786 | shader.AddLine('}'); | ||
| 787 | } else { | ||
| 788 | shader.AddLine(dest + " = " + src + ';'); | ||
| 789 | } | ||
| 778 | } | 790 | } |
| 779 | 791 | ||
| 780 | /// Build the GLSL register list. | 792 | /// Build the GLSL register list. |
| @@ -1510,8 +1522,9 @@ private: | |||
| 1510 | ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented"); | 1522 | ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented"); |
| 1511 | 1523 | ||
| 1512 | op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); | 1524 | op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); |
| 1525 | |||
| 1513 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, | 1526 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, |
| 1514 | instr.alu.saturate_d); | 1527 | instr.alu.saturate_d, 0, true); |
| 1515 | break; | 1528 | break; |
| 1516 | } | 1529 | } |
| 1517 | case OpCode::Id::FADD_C: | 1530 | case OpCode::Id::FADD_C: |
| @@ -1519,8 +1532,9 @@ private: | |||
| 1519 | case OpCode::Id::FADD_IMM: { | 1532 | case OpCode::Id::FADD_IMM: { |
| 1520 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | 1533 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); |
| 1521 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | 1534 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); |
| 1535 | |||
| 1522 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, | 1536 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, |
| 1523 | instr.alu.saturate_d); | 1537 | instr.alu.saturate_d, 0, true); |
| 1524 | break; | 1538 | break; |
| 1525 | } | 1539 | } |
| 1526 | case OpCode::Id::MUFU: { | 1540 | case OpCode::Id::MUFU: { |
| @@ -1528,31 +1542,31 @@ private: | |||
| 1528 | switch (instr.sub_op) { | 1542 | switch (instr.sub_op) { |
| 1529 | case SubOp::Cos: | 1543 | case SubOp::Cos: |
| 1530 | regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, | 1544 | regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, |
| 1531 | instr.alu.saturate_d); | 1545 | instr.alu.saturate_d, 0, true); |
| 1532 | break; | 1546 | break; |
| 1533 | case SubOp::Sin: | 1547 | case SubOp::Sin: |
| 1534 | regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, | 1548 | regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, |
| 1535 | instr.alu.saturate_d); | 1549 | instr.alu.saturate_d, 0, true); |
| 1536 | break; | 1550 | break; |
| 1537 | case SubOp::Ex2: | 1551 | case SubOp::Ex2: |
| 1538 | regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, | 1552 | regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, |
| 1539 | instr.alu.saturate_d); | 1553 | instr.alu.saturate_d, 0, true); |
| 1540 | break; | 1554 | break; |
| 1541 | case SubOp::Lg2: | 1555 | case SubOp::Lg2: |
| 1542 | regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, | 1556 | regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, |
| 1543 | instr.alu.saturate_d); | 1557 | instr.alu.saturate_d, 0, true); |
| 1544 | break; | 1558 | break; |
| 1545 | case SubOp::Rcp: | 1559 | case SubOp::Rcp: |
| 1546 | regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, | 1560 | regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, |
| 1547 | instr.alu.saturate_d); | 1561 | instr.alu.saturate_d, 0, true); |
| 1548 | break; | 1562 | break; |
| 1549 | case SubOp::Rsq: | 1563 | case SubOp::Rsq: |
| 1550 | regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, | 1564 | regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, |
| 1551 | instr.alu.saturate_d); | 1565 | instr.alu.saturate_d, 0, true); |
| 1552 | break; | 1566 | break; |
| 1553 | case SubOp::Sqrt: | 1567 | case SubOp::Sqrt: |
| 1554 | regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, | 1568 | regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, |
| 1555 | instr.alu.saturate_d); | 1569 | instr.alu.saturate_d, 0, true); |
| 1556 | break; | 1570 | break; |
| 1557 | default: | 1571 | default: |
| 1558 | LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", | 1572 | LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", |
| @@ -1573,7 +1587,7 @@ private: | |||
| 1573 | regs.SetRegisterToFloat(instr.gpr0, 0, | 1587 | regs.SetRegisterToFloat(instr.gpr0, 0, |
| 1574 | '(' + condition + ") ? min(" + parameters + ") : max(" + | 1588 | '(' + condition + ") ? min(" + parameters + ") : max(" + |
| 1575 | parameters + ')', | 1589 | parameters + ')', |
| 1576 | 1, 1); | 1590 | 1, 1, false, 0, true); |
| 1577 | break; | 1591 | break; |
| 1578 | } | 1592 | } |
| 1579 | case OpCode::Id::RRO_C: | 1593 | case OpCode::Id::RRO_C: |
| @@ -1602,7 +1616,7 @@ private: | |||
| 1602 | regs.SetRegisterToFloat(instr.gpr0, 0, | 1616 | regs.SetRegisterToFloat(instr.gpr0, 0, |
| 1603 | regs.GetRegisterAsFloat(instr.gpr8) + " * " + | 1617 | regs.GetRegisterAsFloat(instr.gpr8) + " * " + |
| 1604 | GetImmediate32(instr), | 1618 | GetImmediate32(instr), |
| 1605 | 1, 1, instr.fmul32.saturate); | 1619 | 1, 1, instr.fmul32.saturate, 0, true); |
| 1606 | break; | 1620 | break; |
| 1607 | } | 1621 | } |
| 1608 | case OpCode::Id::FADD32I: { | 1622 | case OpCode::Id::FADD32I: { |
| @@ -1625,7 +1639,7 @@ private: | |||
| 1625 | op_b = "-(" + op_b + ')'; | 1639 | op_b = "-(" + op_b + ')'; |
| 1626 | } | 1640 | } |
| 1627 | 1641 | ||
| 1628 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1); | 1642 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true); |
| 1629 | break; | 1643 | break; |
| 1630 | } | 1644 | } |
| 1631 | } | 1645 | } |
| @@ -2087,8 +2101,9 @@ private: | |||
| 2087 | } | 2101 | } |
| 2088 | } | 2102 | } |
| 2089 | 2103 | ||
| 2090 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1, | 2104 | regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', |
| 2091 | instr.alu.saturate_d); | 2105 | 1, 1, instr.alu.saturate_d, 0, true); |
| 2106 | |||
| 2092 | break; | 2107 | break; |
| 2093 | } | 2108 | } |
| 2094 | case OpCode::Type::Hfma2: { | 2109 | case OpCode::Type::Hfma2: { |