summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/ir_opt/constant_propagation_pass.cpp')
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp227
1 files changed, 226 insertions, 1 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 4d81e9336..f46e55122 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -10,6 +10,7 @@
10#include "shader_recompiler/environment.h" 10#include "shader_recompiler/environment.h"
11#include "shader_recompiler/exception.h" 11#include "shader_recompiler/exception.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h" 12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/modifiers.h"
13#include "shader_recompiler/frontend/ir/value.h" 14#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/ir_opt/passes.h" 15#include "shader_recompiler/ir_opt/passes.h"
15 16
@@ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) {
410 } 411 }
411} 412}
412 413
414void FoldFPAdd32(IR::Inst& inst) {
415 if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) {
416 return;
417 }
418 const IR::Value lhs_value{inst.Arg(0)};
419 const IR::Value rhs_value{inst.Arg(1)};
420 const auto check_neutral = [](const IR::Value& one_operand) {
421 return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f;
422 };
423 if (check_neutral(lhs_value)) {
424 inst.ReplaceUsesWith(rhs_value);
425 }
426 if (check_neutral(rhs_value)) {
427 inst.ReplaceUsesWith(lhs_value);
428 }
429}
430
431bool FoldDerivateYFromCorrection(IR::Inst& inst) {
432 const IR::Value lhs_value{inst.Arg(0)};
433 const IR::Value rhs_value{inst.Arg(1)};
434 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
435 IR::Inst* const rhs_op{rhs_value.InstRecursive()};
436 if (lhs_op->GetOpcode() == IR::Opcode::YDirection) {
437 if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
438 return false;
439 }
440 inst.ReplaceUsesWith(rhs_value);
441 return true;
442 }
443 if (rhs_op->GetOpcode() != IR::Opcode::YDirection) {
444 return false;
445 }
446 if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
447 return false;
448 }
449 inst.ReplaceUsesWith(lhs_value);
450 return true;
451}
452
413void FoldFPMul32(IR::Inst& inst) { 453void FoldFPMul32(IR::Inst& inst) {
454 if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) {
455 return;
456 }
414 const auto control{inst.Flags<IR::FpControl>()}; 457 const auto control{inst.Flags<IR::FpControl>()};
415 if (control.no_contraction) { 458 if (control.no_contraction) {
416 return; 459 return;
@@ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) {
421 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { 464 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
422 return; 465 return;
423 } 466 }
467 if (FoldDerivateYFromCorrection(inst)) {
468 return;
469 }
424 IR::Inst* const lhs_op{lhs_value.InstRecursive()}; 470 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
425 IR::Inst* const rhs_op{rhs_value.InstRecursive()}; 471 IR::Inst* const rhs_op{rhs_value.InstRecursive()};
426 if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || 472 if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
@@ -622,7 +668,12 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
622 } 668 }
623 const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; 669 const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
624 if (value_2 != value_3) { 670 if (value_2 != value_3) {
625 return; 671 if (!value_2.IsImmediate() || !value_3.IsImmediate()) {
672 return;
673 }
674 if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) {
675 return;
676 }
626 } 677 }
627 const IR::Value index{inst2->Arg(1)}; 678 const IR::Value index{inst2->Arg(1)};
628 const IR::Value clamp{inst2->Arg(2)}; 679 const IR::Value clamp{inst2->Arg(2)};
@@ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
648 } 699 }
649} 700}
650 701
702bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) {
703 if (coord.IsImmediate()) {
704 return false;
705 }
706 const auto check_through_shuffle = [](IR::Value input, IR::Value& result) {
707 const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)};
708 IR::Inst* const inst2{value_1.InstRecursive()};
709 if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) {
710 return false;
711 }
712 const IR::Value index{inst2->Arg(1).Resolve()};
713 const IR::Value clamp{inst2->Arg(2).Resolve()};
714 const IR::Value segmentation_mask{inst2->Arg(3).Resolve()};
715 if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
716 return false;
717 }
718 if (index.U32() != 3 && clamp.U32() != 3) {
719 return false;
720 }
721 result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32);
722 return true;
723 };
724 IR::Inst* const inst = coord.InstRecursive();
725 if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) {
726 return false;
727 }
728 std::array<IR::Value, 3> temporary_values;
729 IR::Value value_1 = inst->Arg(0).Resolve();
730 IR::Value value_2 = inst->Arg(1).Resolve();
731 IR::Value value_3 = inst->Arg(2).Resolve();
732 std::array<u32, 4> swizzles_mask_a{};
733 std::array<u32, 4> swizzles_mask_b{};
734 const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) {
735 u32 value = mask.U32();
736 for (size_t i = 0; i < 4; i++) {
737 mask_results[i] = (value >> (i * 2)) & 0x3;
738 }
739 };
740 resolve_mask(swizzles_mask_a, value_3);
741 size_t coordinate_index = 0;
742 const auto resolve_pending = [&](IR::Value resolve_v) {
743 IR::Inst* const inst_r = resolve_v.InstRecursive();
744 if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) {
745 return false;
746 }
747 if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) {
748 return false;
749 }
750 if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) {
751 return false;
752 }
753 resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve());
754 return true;
755 };
756 if (value_1.IsImmediate() || value_2.IsImmediate()) {
757 return false;
758 }
759 bool should_continue = false;
760 if (resolve_pending(value_1)) {
761 should_continue = check_through_shuffle(value_2, temporary_values[0]);
762 coordinate_index = 0;
763 }
764 if (resolve_pending(value_2)) {
765 should_continue = check_through_shuffle(value_1, temporary_values[0]);
766 coordinate_index = 2;
767 }
768 if (!should_continue) {
769 return false;
770 }
771 // figure which is which
772 size_t zero_mask_a = 0;
773 size_t zero_mask_b = 0;
774 for (size_t i = 0; i < 4; i++) {
775 if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) {
776 // last operand can be inversed, we cannot determine a result.
777 return false;
778 }
779 zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i;
780 zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i;
781 }
782 static constexpr size_t ddx_pattern = 0b1010;
783 static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111;
784 if (std::popcount(zero_mask_a) != 2) {
785 return false;
786 }
787 if (std::popcount(zero_mask_b) != 2) {
788 return false;
789 }
790 if (zero_mask_a == zero_mask_b) {
791 return false;
792 }
793 results[0] = temporary_values[coordinate_index];
794
795 if (coordinate_index == 0) {
796 if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) {
797 results[1] = temporary_values[1];
798 results[2] = temporary_values[2];
799 return true;
800 }
801 results[2] = temporary_values[1];
802 results[1] = temporary_values[2];
803 } else {
804 const auto assign_result = [&results](IR::Value temporary_value, size_t mask) {
805 if (mask == ddx_pattern || mask == ddx_pattern_inv) {
806 results[1] = temporary_value;
807 return;
808 }
809 results[2] = temporary_value;
810 };
811 assign_result(temporary_values[1], zero_mask_b);
812 assign_result(temporary_values[0], zero_mask_a);
813 }
814
815 return true;
816}
817
818void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
819 IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>();
820 auto orig_opcode = inst.GetOpcode();
821 if (info.ndv_is_active == 0) {
822 return;
823 }
824 if (info.type != TextureType::Color3D) {
825 return;
826 }
827 const IR::Value handle{inst.Arg(0)};
828 const IR::Value coords{inst.Arg(1)};
829 const IR::Value bias_lc{inst.Arg(2)};
830 const IR::Value offset{inst.Arg(3)};
831 if (!offset.IsImmediate()) {
832 return;
833 }
834 IR::Inst* const inst2 = coords.InstRecursive();
835 std::array<std::array<IR::Value, 3>, 3> results_matrix;
836 for (size_t i = 0; i < 3; i++) {
837 if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) {
838 return;
839 }
840 }
841 IR::F32 lod_clamp{};
842 if (info.has_lod_clamp != 0) {
843 if (!bias_lc.IsImmediate()) {
844 lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()};
845 } else {
846 lod_clamp = IR::F32{bias_lc};
847 }
848 }
849 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
850 IR::Value new_coords =
851 ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]);
852 IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
853 results_matrix[1][1], results_matrix[1][2]);
854 IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
855 info.num_derivates.Assign(3);
856 IR::Value new_gradient_instruction =
857 ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
858 IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
859 if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) {
860 new_inst->ReplaceOpcode(IR::Opcode::ImageGradient);
861 }
862 inst.ReplaceUsesWith(new_gradient_instruction);
863}
864
651void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { 865void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
652 const IR::Value bank{inst.Arg(0)}; 866 const IR::Value bank{inst.Arg(0)};
653 const IR::Value offset{inst.Arg(1)}; 867 const IR::Value offset{inst.Arg(1)};
@@ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
743 case IR::Opcode::SelectF32: 957 case IR::Opcode::SelectF32:
744 case IR::Opcode::SelectF64: 958 case IR::Opcode::SelectF64:
745 return FoldSelect(inst); 959 return FoldSelect(inst);
960 case IR::Opcode::FPNeg32:
961 FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
962 return;
963 case IR::Opcode::FPAdd32:
964 FoldFPAdd32(inst);
965 return;
746 case IR::Opcode::FPMul32: 966 case IR::Opcode::FPMul32:
747 return FoldFPMul32(inst); 967 return FoldFPMul32(inst);
748 case IR::Opcode::LogicalAnd: 968 case IR::Opcode::LogicalAnd:
@@ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
858 FoldDriverConstBuffer(env, block, inst, 1); 1078 FoldDriverConstBuffer(env, block, inst, 1);
859 } 1079 }
860 break; 1080 break;
1081 case IR::Opcode::BindlessImageSampleImplicitLod:
1082 case IR::Opcode::BoundImageSampleImplicitLod:
1083 case IR::Opcode::ImageSampleImplicitLod:
1084 FoldImageSampleImplicitLod(block, inst);
1085 break;
861 default: 1086 default:
862 break; 1087 break;
863 } 1088 }