diff options
Diffstat (limited to 'src/shader_recompiler/ir_opt/constant_propagation_pass.cpp')
| -rw-r--r-- | src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 227 |
1 files changed, 226 insertions, 1 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 4d81e9336..f46e55122 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "shader_recompiler/environment.h" | 10 | #include "shader_recompiler/environment.h" |
| 11 | #include "shader_recompiler/exception.h" | 11 | #include "shader_recompiler/exception.h" |
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| 13 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | 14 | #include "shader_recompiler/frontend/ir/value.h" |
| 14 | #include "shader_recompiler/ir_opt/passes.h" | 15 | #include "shader_recompiler/ir_opt/passes.h" |
| 15 | 16 | ||
| @@ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) { | |||
| 410 | } | 411 | } |
| 411 | } | 412 | } |
| 412 | 413 | ||
| 414 | void FoldFPAdd32(IR::Inst& inst) { | ||
| 415 | if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) { | ||
| 416 | return; | ||
| 417 | } | ||
| 418 | const IR::Value lhs_value{inst.Arg(0)}; | ||
| 419 | const IR::Value rhs_value{inst.Arg(1)}; | ||
| 420 | const auto check_neutral = [](const IR::Value& one_operand) { | ||
| 421 | return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f; | ||
| 422 | }; | ||
| 423 | if (check_neutral(lhs_value)) { | ||
| 424 | inst.ReplaceUsesWith(rhs_value); | ||
| 425 | } | ||
| 426 | if (check_neutral(rhs_value)) { | ||
| 427 | inst.ReplaceUsesWith(lhs_value); | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | bool FoldDerivateYFromCorrection(IR::Inst& inst) { | ||
| 432 | const IR::Value lhs_value{inst.Arg(0)}; | ||
| 433 | const IR::Value rhs_value{inst.Arg(1)}; | ||
| 434 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | ||
| 435 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; | ||
| 436 | if (lhs_op->GetOpcode() == IR::Opcode::YDirection) { | ||
| 437 | if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) { | ||
| 438 | return false; | ||
| 439 | } | ||
| 440 | inst.ReplaceUsesWith(rhs_value); | ||
| 441 | return true; | ||
| 442 | } | ||
| 443 | if (rhs_op->GetOpcode() != IR::Opcode::YDirection) { | ||
| 444 | return false; | ||
| 445 | } | ||
| 446 | if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) { | ||
| 447 | return false; | ||
| 448 | } | ||
| 449 | inst.ReplaceUsesWith(lhs_value); | ||
| 450 | return true; | ||
| 451 | } | ||
| 452 | |||
| 413 | void FoldFPMul32(IR::Inst& inst) { | 453 | void FoldFPMul32(IR::Inst& inst) { |
| 454 | if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) { | ||
| 455 | return; | ||
| 456 | } | ||
| 414 | const auto control{inst.Flags<IR::FpControl>()}; | 457 | const auto control{inst.Flags<IR::FpControl>()}; |
| 415 | if (control.no_contraction) { | 458 | if (control.no_contraction) { |
| 416 | return; | 459 | return; |
| @@ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) { | |||
| 421 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { | 464 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { |
| 422 | return; | 465 | return; |
| 423 | } | 466 | } |
| 467 | if (FoldDerivateYFromCorrection(inst)) { | ||
| 468 | return; | ||
| 469 | } | ||
| 424 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | 470 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; |
| 425 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; | 471 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; |
| 426 | if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || | 472 | if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || |
| @@ -622,7 +668,12 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | |||
| 622 | } | 668 | } |
| 623 | const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; | 669 | const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; |
| 624 | if (value_2 != value_3) { | 670 | if (value_2 != value_3) { |
| 625 | return; | 671 | if (!value_2.IsImmediate() || !value_3.IsImmediate()) { |
| 672 | return; | ||
| 673 | } | ||
| 674 | if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) { | ||
| 675 | return; | ||
| 676 | } | ||
| 626 | } | 677 | } |
| 627 | const IR::Value index{inst2->Arg(1)}; | 678 | const IR::Value index{inst2->Arg(1)}; |
| 628 | const IR::Value clamp{inst2->Arg(2)}; | 679 | const IR::Value clamp{inst2->Arg(2)}; |
| @@ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | |||
| 648 | } | 699 | } |
| 649 | } | 700 | } |
| 650 | 701 | ||
| 702 | bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { | ||
| 703 | if (coord.IsImmediate()) { | ||
| 704 | return false; | ||
| 705 | } | ||
| 706 | const auto check_through_shuffle = [](IR::Value input, IR::Value& result) { | ||
| 707 | const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)}; | ||
| 708 | IR::Inst* const inst2{value_1.InstRecursive()}; | ||
| 709 | if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) { | ||
| 710 | return false; | ||
| 711 | } | ||
| 712 | const IR::Value index{inst2->Arg(1).Resolve()}; | ||
| 713 | const IR::Value clamp{inst2->Arg(2).Resolve()}; | ||
| 714 | const IR::Value segmentation_mask{inst2->Arg(3).Resolve()}; | ||
| 715 | if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { | ||
| 716 | return false; | ||
| 717 | } | ||
| 718 | if (index.U32() != 3 && clamp.U32() != 3) { | ||
| 719 | return false; | ||
| 720 | } | ||
| 721 | result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32); | ||
| 722 | return true; | ||
| 723 | }; | ||
| 724 | IR::Inst* const inst = coord.InstRecursive(); | ||
| 725 | if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) { | ||
| 726 | return false; | ||
| 727 | } | ||
| 728 | std::array<IR::Value, 3> temporary_values; | ||
| 729 | IR::Value value_1 = inst->Arg(0).Resolve(); | ||
| 730 | IR::Value value_2 = inst->Arg(1).Resolve(); | ||
| 731 | IR::Value value_3 = inst->Arg(2).Resolve(); | ||
| 732 | std::array<u32, 4> swizzles_mask_a{}; | ||
| 733 | std::array<u32, 4> swizzles_mask_b{}; | ||
| 734 | const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) { | ||
| 735 | u32 value = mask.U32(); | ||
| 736 | for (size_t i = 0; i < 4; i++) { | ||
| 737 | mask_results[i] = (value >> (i * 2)) & 0x3; | ||
| 738 | } | ||
| 739 | }; | ||
| 740 | resolve_mask(swizzles_mask_a, value_3); | ||
| 741 | size_t coordinate_index = 0; | ||
| 742 | const auto resolve_pending = [&](IR::Value resolve_v) { | ||
| 743 | IR::Inst* const inst_r = resolve_v.InstRecursive(); | ||
| 744 | if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) { | ||
| 745 | return false; | ||
| 746 | } | ||
| 747 | if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) { | ||
| 748 | return false; | ||
| 749 | } | ||
| 750 | if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) { | ||
| 751 | return false; | ||
| 752 | } | ||
| 753 | resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve()); | ||
| 754 | return true; | ||
| 755 | }; | ||
| 756 | if (value_1.IsImmediate() || value_2.IsImmediate()) { | ||
| 757 | return false; | ||
| 758 | } | ||
| 759 | bool should_continue = false; | ||
| 760 | if (resolve_pending(value_1)) { | ||
| 761 | should_continue = check_through_shuffle(value_2, temporary_values[0]); | ||
| 762 | coordinate_index = 0; | ||
| 763 | } | ||
| 764 | if (resolve_pending(value_2)) { | ||
| 765 | should_continue = check_through_shuffle(value_1, temporary_values[0]); | ||
| 766 | coordinate_index = 2; | ||
| 767 | } | ||
| 768 | if (!should_continue) { | ||
| 769 | return false; | ||
| 770 | } | ||
| 771 | // figure which is which | ||
| 772 | size_t zero_mask_a = 0; | ||
| 773 | size_t zero_mask_b = 0; | ||
| 774 | for (size_t i = 0; i < 4; i++) { | ||
| 775 | if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) { | ||
| 776 | // last operand can be inversed, we cannot determine a result. | ||
| 777 | return false; | ||
| 778 | } | ||
| 779 | zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i; | ||
| 780 | zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i; | ||
| 781 | } | ||
| 782 | static constexpr size_t ddx_pattern = 0b1010; | ||
| 783 | static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111; | ||
| 784 | if (std::popcount(zero_mask_a) != 2) { | ||
| 785 | return false; | ||
| 786 | } | ||
| 787 | if (std::popcount(zero_mask_b) != 2) { | ||
| 788 | return false; | ||
| 789 | } | ||
| 790 | if (zero_mask_a == zero_mask_b) { | ||
| 791 | return false; | ||
| 792 | } | ||
| 793 | results[0] = temporary_values[coordinate_index]; | ||
| 794 | |||
| 795 | if (coordinate_index == 0) { | ||
| 796 | if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) { | ||
| 797 | results[1] = temporary_values[1]; | ||
| 798 | results[2] = temporary_values[2]; | ||
| 799 | return true; | ||
| 800 | } | ||
| 801 | results[2] = temporary_values[1]; | ||
| 802 | results[1] = temporary_values[2]; | ||
| 803 | } else { | ||
| 804 | const auto assign_result = [&results](IR::Value temporary_value, size_t mask) { | ||
| 805 | if (mask == ddx_pattern || mask == ddx_pattern_inv) { | ||
| 806 | results[1] = temporary_value; | ||
| 807 | return; | ||
| 808 | } | ||
| 809 | results[2] = temporary_value; | ||
| 810 | }; | ||
| 811 | assign_result(temporary_values[1], zero_mask_b); | ||
| 812 | assign_result(temporary_values[0], zero_mask_a); | ||
| 813 | } | ||
| 814 | |||
| 815 | return true; | ||
| 816 | } | ||
| 817 | |||
| 818 | void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | ||
| 819 | IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>(); | ||
| 820 | auto orig_opcode = inst.GetOpcode(); | ||
| 821 | if (info.ndv_is_active == 0) { | ||
| 822 | return; | ||
| 823 | } | ||
| 824 | if (info.type != TextureType::Color3D) { | ||
| 825 | return; | ||
| 826 | } | ||
| 827 | const IR::Value handle{inst.Arg(0)}; | ||
| 828 | const IR::Value coords{inst.Arg(1)}; | ||
| 829 | const IR::Value bias_lc{inst.Arg(2)}; | ||
| 830 | const IR::Value offset{inst.Arg(3)}; | ||
| 831 | if (!offset.IsImmediate()) { | ||
| 832 | return; | ||
| 833 | } | ||
| 834 | IR::Inst* const inst2 = coords.InstRecursive(); | ||
| 835 | std::array<std::array<IR::Value, 3>, 3> results_matrix; | ||
| 836 | for (size_t i = 0; i < 3; i++) { | ||
| 837 | if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { | ||
| 838 | return; | ||
| 839 | } | ||
| 840 | } | ||
| 841 | IR::F32 lod_clamp{}; | ||
| 842 | if (info.has_lod_clamp != 0) { | ||
| 843 | if (!bias_lc.IsImmediate()) { | ||
| 844 | lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()}; | ||
| 845 | } else { | ||
| 846 | lod_clamp = IR::F32{bias_lc}; | ||
| 847 | } | ||
| 848 | } | ||
| 849 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 850 | IR::Value new_coords = | ||
| 851 | ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]); | ||
| 852 | IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], | ||
| 853 | results_matrix[1][1], results_matrix[1][2]); | ||
| 854 | IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); | ||
| 855 | info.num_derivates.Assign(3); | ||
| 856 | IR::Value new_gradient_instruction = | ||
| 857 | ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); | ||
| 858 | IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); | ||
| 859 | if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) { | ||
| 860 | new_inst->ReplaceOpcode(IR::Opcode::ImageGradient); | ||
| 861 | } | ||
| 862 | inst.ReplaceUsesWith(new_gradient_instruction); | ||
| 863 | } | ||
| 864 | |||
| 651 | void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { | 865 | void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { |
| 652 | const IR::Value bank{inst.Arg(0)}; | 866 | const IR::Value bank{inst.Arg(0)}; |
| 653 | const IR::Value offset{inst.Arg(1)}; | 867 | const IR::Value offset{inst.Arg(1)}; |
| @@ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | |||
| 743 | case IR::Opcode::SelectF32: | 957 | case IR::Opcode::SelectF32: |
| 744 | case IR::Opcode::SelectF64: | 958 | case IR::Opcode::SelectF64: |
| 745 | return FoldSelect(inst); | 959 | return FoldSelect(inst); |
| 960 | case IR::Opcode::FPNeg32: | ||
| 961 | FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); | ||
| 962 | return; | ||
| 963 | case IR::Opcode::FPAdd32: | ||
| 964 | FoldFPAdd32(inst); | ||
| 965 | return; | ||
| 746 | case IR::Opcode::FPMul32: | 966 | case IR::Opcode::FPMul32: |
| 747 | return FoldFPMul32(inst); | 967 | return FoldFPMul32(inst); |
| 748 | case IR::Opcode::LogicalAnd: | 968 | case IR::Opcode::LogicalAnd: |
| @@ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | |||
| 858 | FoldDriverConstBuffer(env, block, inst, 1); | 1078 | FoldDriverConstBuffer(env, block, inst, 1); |
| 859 | } | 1079 | } |
| 860 | break; | 1080 | break; |
| 1081 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 1082 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 1083 | case IR::Opcode::ImageSampleImplicitLod: | ||
| 1084 | FoldImageSampleImplicitLod(block, inst); | ||
| 1085 | break; | ||
| 861 | default: | 1086 | default: |
| 862 | break; | 1087 | break; |
| 863 | } | 1088 | } |