summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-08-18 18:30:31 +0200
committerGravatar Liam2023-08-18 22:17:02 -0400
commitc03f0b3c893f2bc2ae4f1e1825c5ac1453c36710 (patch)
tree3f7f14fad6574878a7c0f94fbe16d574b61cb95a /src
parentMerge pull request #11278 from Kelebek1/dma_sync (diff)
downloadyuzu-c03f0b3c893f2bc2ae4f1e1825c5ac1453c36710.tar.gz
yuzu-c03f0b3c893f2bc2ae4f1e1825c5ac1453c36710.tar.xz
yuzu-c03f0b3c893f2bc2ae4f1e1825c5ac1453c36710.zip
Shader Recomnpiler: implement textuzreGrad 3D emulation constant propagation
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp23
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_image.cpp8
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp6
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp4
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp1
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp227
8 files changed, 261 insertions, 11 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
index 85ee27333..d0e308124 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -558,12 +558,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
558 const IR::Value& coord, const IR::Value& derivatives, 558 const IR::Value& coord, const IR::Value& derivatives,
559 const IR::Value& offset, const IR::Value& lod_clamp) { 559 const IR::Value& offset, const IR::Value& lod_clamp) {
560 const auto info{inst.Flags<IR::TextureInstInfo>()}; 560 const auto info{inst.Flags<IR::TextureInstInfo>()};
561 ScopedRegister dpdx, dpdy; 561 ScopedRegister dpdx, dpdy, coords;
562 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; 562 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
563 if (multi_component) { 563 if (multi_component) {
564 // Allocate this early to avoid aliasing other registers 564 // Allocate this early to avoid aliasing other registers
565 dpdx = ScopedRegister{ctx.reg_alloc}; 565 dpdx = ScopedRegister{ctx.reg_alloc};
566 dpdy = ScopedRegister{ctx.reg_alloc}; 566 dpdy = ScopedRegister{ctx.reg_alloc};
567 if (info.num_derivates >= 3) {
568 coords = ScopedRegister{ctx.reg_alloc};
569 }
567 } 570 }
568 const auto sparse_inst{PrepareSparse(inst)}; 571 const auto sparse_inst{PrepareSparse(inst)};
569 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 572 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
@@ -580,15 +583,27 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
580 "MOV.F {}.y,{}.w;", 583 "MOV.F {}.y,{}.w;",
581 dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, 584 dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
582 dpdy.reg, derivatives_vec); 585 dpdy.reg, derivatives_vec);
586 Register final_coord;
587 if (info.num_derivates >= 3) {
588 ctx.Add("MOV.F {}.z,{}.x;"
589 "MOV.F {}.z,{}.y;",
590 dpdx.reg, coord_vec, dpdy.reg, coord_vec);
591 ctx.Add("MOV.F {}.x,0;"
592 "MOV.F {}.y,0;",
593 "MOV.F {}.z,0;", coords.reg, coords.reg, coords.reg);
594 final_coord = coords.reg;
595 } else {
596 final_coord = coord_vec;
597 }
583 if (info.has_lod_clamp) { 598 if (info.has_lod_clamp) {
584 const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; 599 const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
585 ctx.Add("MOV.F {}.w,{};" 600 ctx.Add("MOV.F {}.w,{};"
586 "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", 601 "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
587 dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, 602 dpdy.reg, lod_clamp_value, sparse_mod, ret, final_coord, dpdx.reg, dpdy.reg,
588 texture, type, offset_vec); 603 texture, type, offset_vec);
589 } else { 604 } else {
590 ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, 605 ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, final_coord, dpdx.reg,
591 texture, type, offset_vec); 606 dpdy.reg, texture, type, offset_vec);
592 } 607 }
593 } else { 608 } else {
594 ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, 609 ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
index 418505475..3ad668a47 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -548,7 +548,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
548 if (sparse_inst) { 548 if (sparse_inst) {
549 throw NotImplementedException("EmitImageGradient Sparse"); 549 throw NotImplementedException("EmitImageGradient Sparse");
550 } 550 }
551 if (!offset.IsEmpty()) { 551 if (!offset.IsEmpty() && info.num_derivates <= 2) {
552 throw NotImplementedException("EmitImageGradient offset"); 552 throw NotImplementedException("EmitImageGradient offset");
553 } 553 }
554 const auto texture{Texture(ctx, info, index)}; 554 const auto texture{Texture(ctx, info, index)};
@@ -556,6 +556,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
556 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; 556 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
557 const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; 557 const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
558 if (multi_component) { 558 if (multi_component) {
559 if (info.num_derivates >= 3) {
560 const auto offset_vec{ctx.var_alloc.Consume(offset)};
561 ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yz, {}.y));", texel, texture,
562 coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec);
563 return;
564 }
559 ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, 565 ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords,
560 derivatives_vec, derivatives_vec); 566 derivatives_vec, derivatives_vec);
561 } else { 567 } else {
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
index 69035d462..1e9e8c8f5 100644
--- a/src/shader_recompiler/frontend/ir/modifiers.h
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -42,6 +42,7 @@ union TextureInstInfo {
42 BitField<23, 2, u32> gather_component; 42 BitField<23, 2, u32> gather_component;
43 BitField<25, 2, u32> num_derivates; 43 BitField<25, 2, u32> num_derivates;
44 BitField<27, 3, ImageFormat> image_format; 44 BitField<27, 3, ImageFormat> image_format;
45 BitField<30, 1, u32> ndv_is_active;
45}; 46};
46static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); 47static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
47 48
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
index ef4ffa54b..f00e20023 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -19,7 +19,7 @@ void TranslatorVisitor::FSWZADD(u64 insn) {
19 } const fswzadd{insn}; 19 } const fswzadd{insn};
20 20
21 if (fswzadd.ndv != 0) { 21 if (fswzadd.ndv != 0) {
22 throw NotImplementedException("FSWZADD NDV"); 22 LOG_WARNING(Shader, "(STUBBED) FSWZADD - NDV mode");
23 } 23 }
24 24
25 const IR::F32 src_a{GetFloatReg8(insn)}; 25 const IR::F32 src_a{GetFloatReg8(insn)};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
index 82aec3b73..1ddfeab06 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -16,8 +16,10 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa
16 BitField<12, 4, u64> mov32i_mask; 16 BitField<12, 4, u64> mov32i_mask;
17 } const mov{insn}; 17 } const mov{insn};
18 18
19 if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { 19 u64 mask = is_mov32i ? mov.mov32i_mask : mov.mask;
20 throw NotImplementedException("Non-full move mask"); 20 if (mask != 0xf && mask != 0x1) {
21 LOG_WARNING(Shader, "(STUBBED) Masked Mov");
22 return;
21 } 23 }
22 v.X(mov.dest_reg, src); 24 v.X(mov.dest_reg, src);
23} 25}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 2f930f1ea..6203003b3 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -209,7 +209,7 @@ void TranslatorVisitor::R2B(u64) {
209} 209}
210 210
211void TranslatorVisitor::RAM(u64) { 211void TranslatorVisitor::RAM(u64) {
212 ThrowNotImplemented(Opcode::RAM); 212 LOG_WARNING(Shader, "(STUBBED) RAM Instruction");
213} 213}
214 214
215void TranslatorVisitor::RET(u64) { 215void TranslatorVisitor::RET(u64) {
@@ -221,7 +221,7 @@ void TranslatorVisitor::RTT(u64) {
221} 221}
222 222
223void TranslatorVisitor::SAM(u64) { 223void TranslatorVisitor::SAM(u64) {
224 ThrowNotImplemented(Opcode::SAM); 224 LOG_WARNING(Shader, "(STUBBED) SAM Instruction");
225} 225}
226 226
227void TranslatorVisitor::SETCRSPTR(u64) { 227void TranslatorVisitor::SETCRSPTR(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
index 2459fc30d..7a9b7fff8 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -172,6 +172,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
172 info.is_depth.Assign(tex.dc != 0 ? 1 : 0); 172 info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
173 info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); 173 info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
174 info.has_lod_clamp.Assign(lc ? 1 : 0); 174 info.has_lod_clamp.Assign(lc ? 1 : 0);
175 info.ndv_is_active.Assign(tex.ndv != 0 ? 1 : 0);
175 176
176 const IR::Value sample{[&]() -> IR::Value { 177 const IR::Value sample{[&]() -> IR::Value {
177 if (tex.dc == 0) { 178 if (tex.dc == 0) {
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 4d81e9336..f46e55122 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -10,6 +10,7 @@
10#include "shader_recompiler/environment.h" 10#include "shader_recompiler/environment.h"
11#include "shader_recompiler/exception.h" 11#include "shader_recompiler/exception.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h" 12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/modifiers.h"
13#include "shader_recompiler/frontend/ir/value.h" 14#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/ir_opt/passes.h" 15#include "shader_recompiler/ir_opt/passes.h"
15 16
@@ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) {
410 } 411 }
411} 412}
412 413
414void FoldFPAdd32(IR::Inst& inst) {
415 if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) {
416 return;
417 }
418 const IR::Value lhs_value{inst.Arg(0)};
419 const IR::Value rhs_value{inst.Arg(1)};
420 const auto check_neutral = [](const IR::Value& one_operand) {
421 return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f;
422 };
423 if (check_neutral(lhs_value)) {
424 inst.ReplaceUsesWith(rhs_value);
425 }
426 if (check_neutral(rhs_value)) {
427 inst.ReplaceUsesWith(lhs_value);
428 }
429}
430
431bool FoldDerivateYFromCorrection(IR::Inst& inst) {
432 const IR::Value lhs_value{inst.Arg(0)};
433 const IR::Value rhs_value{inst.Arg(1)};
434 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
435 IR::Inst* const rhs_op{rhs_value.InstRecursive()};
436 if (lhs_op->GetOpcode() == IR::Opcode::YDirection) {
437 if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
438 return false;
439 }
440 inst.ReplaceUsesWith(rhs_value);
441 return true;
442 }
443 if (rhs_op->GetOpcode() != IR::Opcode::YDirection) {
444 return false;
445 }
446 if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
447 return false;
448 }
449 inst.ReplaceUsesWith(lhs_value);
450 return true;
451}
452
413void FoldFPMul32(IR::Inst& inst) { 453void FoldFPMul32(IR::Inst& inst) {
454 if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) {
455 return;
456 }
414 const auto control{inst.Flags<IR::FpControl>()}; 457 const auto control{inst.Flags<IR::FpControl>()};
415 if (control.no_contraction) { 458 if (control.no_contraction) {
416 return; 459 return;
@@ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) {
421 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { 464 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
422 return; 465 return;
423 } 466 }
467 if (FoldDerivateYFromCorrection(inst)) {
468 return;
469 }
424 IR::Inst* const lhs_op{lhs_value.InstRecursive()}; 470 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
425 IR::Inst* const rhs_op{rhs_value.InstRecursive()}; 471 IR::Inst* const rhs_op{rhs_value.InstRecursive()};
426 if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || 472 if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
@@ -622,7 +668,12 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
622 } 668 }
623 const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; 669 const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
624 if (value_2 != value_3) { 670 if (value_2 != value_3) {
625 return; 671 if (!value_2.IsImmediate() || !value_3.IsImmediate()) {
672 return;
673 }
674 if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) {
675 return;
676 }
626 } 677 }
627 const IR::Value index{inst2->Arg(1)}; 678 const IR::Value index{inst2->Arg(1)};
628 const IR::Value clamp{inst2->Arg(2)}; 679 const IR::Value clamp{inst2->Arg(2)};
@@ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
648 } 699 }
649} 700}
650 701
702bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) {
703 if (coord.IsImmediate()) {
704 return false;
705 }
706 const auto check_through_shuffle = [](IR::Value input, IR::Value& result) {
707 const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)};
708 IR::Inst* const inst2{value_1.InstRecursive()};
709 if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) {
710 return false;
711 }
712 const IR::Value index{inst2->Arg(1).Resolve()};
713 const IR::Value clamp{inst2->Arg(2).Resolve()};
714 const IR::Value segmentation_mask{inst2->Arg(3).Resolve()};
715 if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
716 return false;
717 }
718 if (index.U32() != 3 && clamp.U32() != 3) {
719 return false;
720 }
721 result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32);
722 return true;
723 };
724 IR::Inst* const inst = coord.InstRecursive();
725 if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) {
726 return false;
727 }
728 std::array<IR::Value, 3> temporary_values;
729 IR::Value value_1 = inst->Arg(0).Resolve();
730 IR::Value value_2 = inst->Arg(1).Resolve();
731 IR::Value value_3 = inst->Arg(2).Resolve();
732 std::array<u32, 4> swizzles_mask_a{};
733 std::array<u32, 4> swizzles_mask_b{};
734 const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) {
735 u32 value = mask.U32();
736 for (size_t i = 0; i < 4; i++) {
737 mask_results[i] = (value >> (i * 2)) & 0x3;
738 }
739 };
740 resolve_mask(swizzles_mask_a, value_3);
741 size_t coordinate_index = 0;
742 const auto resolve_pending = [&](IR::Value resolve_v) {
743 IR::Inst* const inst_r = resolve_v.InstRecursive();
744 if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) {
745 return false;
746 }
747 if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) {
748 return false;
749 }
750 if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) {
751 return false;
752 }
753 resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve());
754 return true;
755 };
756 if (value_1.IsImmediate() || value_2.IsImmediate()) {
757 return false;
758 }
759 bool should_continue = false;
760 if (resolve_pending(value_1)) {
761 should_continue = check_through_shuffle(value_2, temporary_values[0]);
762 coordinate_index = 0;
763 }
764 if (resolve_pending(value_2)) {
765 should_continue = check_through_shuffle(value_1, temporary_values[0]);
766 coordinate_index = 2;
767 }
768 if (!should_continue) {
769 return false;
770 }
771 // figure which is which
772 size_t zero_mask_a = 0;
773 size_t zero_mask_b = 0;
774 for (size_t i = 0; i < 4; i++) {
775 if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) {
776 // last operand can be inversed, we cannot determine a result.
777 return false;
778 }
779 zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i;
780 zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i;
781 }
782 static constexpr size_t ddx_pattern = 0b1010;
783 static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111;
784 if (std::popcount(zero_mask_a) != 2) {
785 return false;
786 }
787 if (std::popcount(zero_mask_b) != 2) {
788 return false;
789 }
790 if (zero_mask_a == zero_mask_b) {
791 return false;
792 }
793 results[0] = temporary_values[coordinate_index];
794
795 if (coordinate_index == 0) {
796 if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) {
797 results[1] = temporary_values[1];
798 results[2] = temporary_values[2];
799 return true;
800 }
801 results[2] = temporary_values[1];
802 results[1] = temporary_values[2];
803 } else {
804 const auto assign_result = [&results](IR::Value temporary_value, size_t mask) {
805 if (mask == ddx_pattern || mask == ddx_pattern_inv) {
806 results[1] = temporary_value;
807 return;
808 }
809 results[2] = temporary_value;
810 };
811 assign_result(temporary_values[1], zero_mask_b);
812 assign_result(temporary_values[0], zero_mask_a);
813 }
814
815 return true;
816}
817
818void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
819 IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>();
820 auto orig_opcode = inst.GetOpcode();
821 if (info.ndv_is_active == 0) {
822 return;
823 }
824 if (info.type != TextureType::Color3D) {
825 return;
826 }
827 const IR::Value handle{inst.Arg(0)};
828 const IR::Value coords{inst.Arg(1)};
829 const IR::Value bias_lc{inst.Arg(2)};
830 const IR::Value offset{inst.Arg(3)};
831 if (!offset.IsImmediate()) {
832 return;
833 }
834 IR::Inst* const inst2 = coords.InstRecursive();
835 std::array<std::array<IR::Value, 3>, 3> results_matrix;
836 for (size_t i = 0; i < 3; i++) {
837 if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) {
838 return;
839 }
840 }
841 IR::F32 lod_clamp{};
842 if (info.has_lod_clamp != 0) {
843 if (!bias_lc.IsImmediate()) {
844 lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()};
845 } else {
846 lod_clamp = IR::F32{bias_lc};
847 }
848 }
849 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
850 IR::Value new_coords =
851 ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]);
852 IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
853 results_matrix[1][1], results_matrix[1][2]);
854 IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
855 info.num_derivates.Assign(3);
856 IR::Value new_gradient_instruction =
857 ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
858 IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
859 if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) {
860 new_inst->ReplaceOpcode(IR::Opcode::ImageGradient);
861 }
862 inst.ReplaceUsesWith(new_gradient_instruction);
863}
864
651void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { 865void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
652 const IR::Value bank{inst.Arg(0)}; 866 const IR::Value bank{inst.Arg(0)};
653 const IR::Value offset{inst.Arg(1)}; 867 const IR::Value offset{inst.Arg(1)};
@@ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
743 case IR::Opcode::SelectF32: 957 case IR::Opcode::SelectF32:
744 case IR::Opcode::SelectF64: 958 case IR::Opcode::SelectF64:
745 return FoldSelect(inst); 959 return FoldSelect(inst);
960 case IR::Opcode::FPNeg32:
961 FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
962 return;
963 case IR::Opcode::FPAdd32:
964 FoldFPAdd32(inst);
965 return;
746 case IR::Opcode::FPMul32: 966 case IR::Opcode::FPMul32:
747 return FoldFPMul32(inst); 967 return FoldFPMul32(inst);
748 case IR::Opcode::LogicalAnd: 968 case IR::Opcode::LogicalAnd:
@@ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
858 FoldDriverConstBuffer(env, block, inst, 1); 1078 FoldDriverConstBuffer(env, block, inst, 1);
859 } 1079 }
860 break; 1080 break;
1081 case IR::Opcode::BindlessImageSampleImplicitLod:
1082 case IR::Opcode::BoundImageSampleImplicitLod:
1083 case IR::Opcode::ImageSampleImplicitLod:
1084 FoldImageSampleImplicitLod(block, inst);
1085 break;
861 default: 1086 default:
862 break; 1087 break;
863 } 1088 }