diff options
Diffstat (limited to 'src/shader_recompiler')
8 files changed, 261 insertions, 11 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 85ee27333..d0e308124 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | |||
| @@ -558,12 +558,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 558 | const IR::Value& coord, const IR::Value& derivatives, | 558 | const IR::Value& coord, const IR::Value& derivatives, |
| 559 | const IR::Value& offset, const IR::Value& lod_clamp) { | 559 | const IR::Value& offset, const IR::Value& lod_clamp) { |
| 560 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 560 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 561 | ScopedRegister dpdx, dpdy; | 561 | ScopedRegister dpdx, dpdy, coords; |
| 562 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | 562 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; |
| 563 | if (multi_component) { | 563 | if (multi_component) { |
| 564 | // Allocate this early to avoid aliasing other registers | 564 | // Allocate this early to avoid aliasing other registers |
| 565 | dpdx = ScopedRegister{ctx.reg_alloc}; | 565 | dpdx = ScopedRegister{ctx.reg_alloc}; |
| 566 | dpdy = ScopedRegister{ctx.reg_alloc}; | 566 | dpdy = ScopedRegister{ctx.reg_alloc}; |
| 567 | if (info.num_derivates >= 3) { | ||
| 568 | coords = ScopedRegister{ctx.reg_alloc}; | ||
| 569 | } | ||
| 567 | } | 570 | } |
| 568 | const auto sparse_inst{PrepareSparse(inst)}; | 571 | const auto sparse_inst{PrepareSparse(inst)}; |
| 569 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 572 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| @@ -580,15 +583,27 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 580 | "MOV.F {}.y,{}.w;", | 583 | "MOV.F {}.y,{}.w;", |
| 581 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, | 584 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, |
| 582 | dpdy.reg, derivatives_vec); | 585 | dpdy.reg, derivatives_vec); |
| 586 | Register final_coord; | ||
| 587 | if (info.num_derivates >= 3) { | ||
| 588 | ctx.Add("MOV.F {}.z,{}.x;" | ||
| 589 | "MOV.F {}.z,{}.y;", | ||
| 590 | dpdx.reg, coord_vec, dpdy.reg, coord_vec); | ||
| 591 | ctx.Add("MOV.F {}.x,0;" | ||
| 592 | "MOV.F {}.y,0;", | ||
| 593 | "MOV.F {}.z,0;", coords.reg, coords.reg, coords.reg); | ||
| 594 | final_coord = coords.reg; | ||
| 595 | } else { | ||
| 596 | final_coord = coord_vec; | ||
| 597 | } | ||
| 583 | if (info.has_lod_clamp) { | 598 | if (info.has_lod_clamp) { |
| 584 | const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; | 599 | const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; |
| 585 | ctx.Add("MOV.F {}.w,{};" | 600 | ctx.Add("MOV.F {}.w,{};" |
| 586 | "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", | 601 | "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", |
| 587 | dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, | 602 | dpdy.reg, lod_clamp_value, sparse_mod, ret, final_coord, dpdx.reg, dpdy.reg, |
| 588 | texture, type, offset_vec); | 603 | texture, type, offset_vec); |
| 589 | } else { | 604 | } else { |
| 590 | ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, | 605 | ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, final_coord, dpdx.reg, |
| 591 | texture, type, offset_vec); | 606 | dpdy.reg, texture, type, offset_vec); |
| 592 | } | 607 | } |
| 593 | } else { | 608 | } else { |
| 594 | ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, | 609 | ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 418505475..3ad668a47 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | |||
| @@ -548,7 +548,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 548 | if (sparse_inst) { | 548 | if (sparse_inst) { |
| 549 | throw NotImplementedException("EmitImageGradient Sparse"); | 549 | throw NotImplementedException("EmitImageGradient Sparse"); |
| 550 | } | 550 | } |
| 551 | if (!offset.IsEmpty()) { | 551 | if (!offset.IsEmpty() && info.num_derivates <= 2) { |
| 552 | throw NotImplementedException("EmitImageGradient offset"); | 552 | throw NotImplementedException("EmitImageGradient offset"); |
| 553 | } | 553 | } |
| 554 | const auto texture{Texture(ctx, info, index)}; | 554 | const auto texture{Texture(ctx, info, index)}; |
| @@ -556,6 +556,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 556 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | 556 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; |
| 557 | const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; | 557 | const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; |
| 558 | if (multi_component) { | 558 | if (multi_component) { |
| 559 | if (info.num_derivates >= 3) { | ||
| 560 | const auto offset_vec{ctx.var_alloc.Consume(offset)}; | ||
| 561 | ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yz, {}.y));", texel, texture, | ||
| 562 | coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); | ||
| 563 | return; | ||
| 564 | } | ||
| 559 | ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, | 565 | ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, |
| 560 | derivatives_vec, derivatives_vec); | 566 | derivatives_vec, derivatives_vec); |
| 561 | } else { | 567 | } else { |
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 69035d462..1e9e8c8f5 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -42,6 +42,7 @@ union TextureInstInfo { | |||
| 42 | BitField<23, 2, u32> gather_component; | 42 | BitField<23, 2, u32> gather_component; |
| 43 | BitField<25, 2, u32> num_derivates; | 43 | BitField<25, 2, u32> num_derivates; |
| 44 | BitField<27, 3, ImageFormat> image_format; | 44 | BitField<27, 3, ImageFormat> image_format; |
| 45 | BitField<30, 1, u32> ndv_is_active; | ||
| 45 | }; | 46 | }; |
| 46 | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); | 47 | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); |
| 47 | 48 | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp index ef4ffa54b..f00e20023 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp | |||
| @@ -19,7 +19,7 @@ void TranslatorVisitor::FSWZADD(u64 insn) { | |||
| 19 | } const fswzadd{insn}; | 19 | } const fswzadd{insn}; |
| 20 | 20 | ||
| 21 | if (fswzadd.ndv != 0) { | 21 | if (fswzadd.ndv != 0) { |
| 22 | throw NotImplementedException("FSWZADD NDV"); | 22 | LOG_WARNING(Shader, "(STUBBED) FSWZADD - NDV mode"); |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | const IR::F32 src_a{GetFloatReg8(insn)}; | 25 | const IR::F32 src_a{GetFloatReg8(insn)}; |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 82aec3b73..1ddfeab06 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp | |||
| @@ -16,8 +16,10 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa | |||
| 16 | BitField<12, 4, u64> mov32i_mask; | 16 | BitField<12, 4, u64> mov32i_mask; |
| 17 | } const mov{insn}; | 17 | } const mov{insn}; |
| 18 | 18 | ||
| 19 | if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { | 19 | u64 mask = is_mov32i ? mov.mov32i_mask : mov.mask; |
| 20 | throw NotImplementedException("Non-full move mask"); | 20 | if (mask != 0xf && mask != 0x1) { |
| 21 | LOG_WARNING(Shader, "(STUBBED) Masked Mov"); | ||
| 22 | return; | ||
| 21 | } | 23 | } |
| 22 | v.X(mov.dest_reg, src); | 24 | v.X(mov.dest_reg, src); |
| 23 | } | 25 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 2f930f1ea..6203003b3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -209,7 +209,7 @@ void TranslatorVisitor::R2B(u64) { | |||
| 209 | } | 209 | } |
| 210 | 210 | ||
| 211 | void TranslatorVisitor::RAM(u64) { | 211 | void TranslatorVisitor::RAM(u64) { |
| 212 | ThrowNotImplemented(Opcode::RAM); | 212 | LOG_WARNING(Shader, "(STUBBED) RAM Instruction"); |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | void TranslatorVisitor::RET(u64) { | 215 | void TranslatorVisitor::RET(u64) { |
| @@ -221,7 +221,7 @@ void TranslatorVisitor::RTT(u64) { | |||
| 221 | } | 221 | } |
| 222 | 222 | ||
| 223 | void TranslatorVisitor::SAM(u64) { | 223 | void TranslatorVisitor::SAM(u64) { |
| 224 | ThrowNotImplemented(Opcode::SAM); | 224 | LOG_WARNING(Shader, "(STUBBED) SAM Instruction"); |
| 225 | } | 225 | } |
| 226 | 226 | ||
| 227 | void TranslatorVisitor::SETCRSPTR(u64) { | 227 | void TranslatorVisitor::SETCRSPTR(u64) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 2459fc30d..7a9b7fff8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp | |||
| @@ -172,6 +172,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, | |||
| 172 | info.is_depth.Assign(tex.dc != 0 ? 1 : 0); | 172 | info.is_depth.Assign(tex.dc != 0 ? 1 : 0); |
| 173 | info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); | 173 | info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); |
| 174 | info.has_lod_clamp.Assign(lc ? 1 : 0); | 174 | info.has_lod_clamp.Assign(lc ? 1 : 0); |
| 175 | info.ndv_is_active.Assign(tex.ndv != 0 ? 1 : 0); | ||
| 175 | 176 | ||
| 176 | const IR::Value sample{[&]() -> IR::Value { | 177 | const IR::Value sample{[&]() -> IR::Value { |
| 177 | if (tex.dc == 0) { | 178 | if (tex.dc == 0) { |
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 4d81e9336..f46e55122 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "shader_recompiler/environment.h" | 10 | #include "shader_recompiler/environment.h" |
| 11 | #include "shader_recompiler/exception.h" | 11 | #include "shader_recompiler/exception.h" |
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| 13 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | 14 | #include "shader_recompiler/frontend/ir/value.h" |
| 14 | #include "shader_recompiler/ir_opt/passes.h" | 15 | #include "shader_recompiler/ir_opt/passes.h" |
| 15 | 16 | ||
| @@ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) { | |||
| 410 | } | 411 | } |
| 411 | } | 412 | } |
| 412 | 413 | ||
| 414 | void FoldFPAdd32(IR::Inst& inst) { | ||
| 415 | if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) { | ||
| 416 | return; | ||
| 417 | } | ||
| 418 | const IR::Value lhs_value{inst.Arg(0)}; | ||
| 419 | const IR::Value rhs_value{inst.Arg(1)}; | ||
| 420 | const auto check_neutral = [](const IR::Value& one_operand) { | ||
| 421 | return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f; | ||
| 422 | }; | ||
| 423 | if (check_neutral(lhs_value)) { | ||
| 424 | inst.ReplaceUsesWith(rhs_value); | ||
| 425 | } | ||
| 426 | if (check_neutral(rhs_value)) { | ||
| 427 | inst.ReplaceUsesWith(lhs_value); | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | bool FoldDerivateYFromCorrection(IR::Inst& inst) { | ||
| 432 | const IR::Value lhs_value{inst.Arg(0)}; | ||
| 433 | const IR::Value rhs_value{inst.Arg(1)}; | ||
| 434 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | ||
| 435 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; | ||
| 436 | if (lhs_op->GetOpcode() == IR::Opcode::YDirection) { | ||
| 437 | if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) { | ||
| 438 | return false; | ||
| 439 | } | ||
| 440 | inst.ReplaceUsesWith(rhs_value); | ||
| 441 | return true; | ||
| 442 | } | ||
| 443 | if (rhs_op->GetOpcode() != IR::Opcode::YDirection) { | ||
| 444 | return false; | ||
| 445 | } | ||
| 446 | if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) { | ||
| 447 | return false; | ||
| 448 | } | ||
| 449 | inst.ReplaceUsesWith(lhs_value); | ||
| 450 | return true; | ||
| 451 | } | ||
| 452 | |||
| 413 | void FoldFPMul32(IR::Inst& inst) { | 453 | void FoldFPMul32(IR::Inst& inst) { |
| 454 | if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) { | ||
| 455 | return; | ||
| 456 | } | ||
| 414 | const auto control{inst.Flags<IR::FpControl>()}; | 457 | const auto control{inst.Flags<IR::FpControl>()}; |
| 415 | if (control.no_contraction) { | 458 | if (control.no_contraction) { |
| 416 | return; | 459 | return; |
| @@ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) { | |||
| 421 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { | 464 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { |
| 422 | return; | 465 | return; |
| 423 | } | 466 | } |
| 467 | if (FoldDerivateYFromCorrection(inst)) { | ||
| 468 | return; | ||
| 469 | } | ||
| 424 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | 470 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; |
| 425 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; | 471 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; |
| 426 | if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || | 472 | if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || |
| @@ -622,7 +668,12 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | |||
| 622 | } | 668 | } |
| 623 | const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; | 669 | const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; |
| 624 | if (value_2 != value_3) { | 670 | if (value_2 != value_3) { |
| 625 | return; | 671 | if (!value_2.IsImmediate() || !value_3.IsImmediate()) { |
| 672 | return; | ||
| 673 | } | ||
| 674 | if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) { | ||
| 675 | return; | ||
| 676 | } | ||
| 626 | } | 677 | } |
| 627 | const IR::Value index{inst2->Arg(1)}; | 678 | const IR::Value index{inst2->Arg(1)}; |
| 628 | const IR::Value clamp{inst2->Arg(2)}; | 679 | const IR::Value clamp{inst2->Arg(2)}; |
| @@ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | |||
| 648 | } | 699 | } |
| 649 | } | 700 | } |
| 650 | 701 | ||
| 702 | bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { | ||
| 703 | if (coord.IsImmediate()) { | ||
| 704 | return false; | ||
| 705 | } | ||
| 706 | const auto check_through_shuffle = [](IR::Value input, IR::Value& result) { | ||
| 707 | const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)}; | ||
| 708 | IR::Inst* const inst2{value_1.InstRecursive()}; | ||
| 709 | if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) { | ||
| 710 | return false; | ||
| 711 | } | ||
| 712 | const IR::Value index{inst2->Arg(1).Resolve()}; | ||
| 713 | const IR::Value clamp{inst2->Arg(2).Resolve()}; | ||
| 714 | const IR::Value segmentation_mask{inst2->Arg(3).Resolve()}; | ||
| 715 | if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { | ||
| 716 | return false; | ||
| 717 | } | ||
| 718 | if (index.U32() != 3 && clamp.U32() != 3) { | ||
| 719 | return false; | ||
| 720 | } | ||
| 721 | result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32); | ||
| 722 | return true; | ||
| 723 | }; | ||
| 724 | IR::Inst* const inst = coord.InstRecursive(); | ||
| 725 | if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) { | ||
| 726 | return false; | ||
| 727 | } | ||
| 728 | std::array<IR::Value, 3> temporary_values; | ||
| 729 | IR::Value value_1 = inst->Arg(0).Resolve(); | ||
| 730 | IR::Value value_2 = inst->Arg(1).Resolve(); | ||
| 731 | IR::Value value_3 = inst->Arg(2).Resolve(); | ||
| 732 | std::array<u32, 4> swizzles_mask_a{}; | ||
| 733 | std::array<u32, 4> swizzles_mask_b{}; | ||
| 734 | const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) { | ||
| 735 | u32 value = mask.U32(); | ||
| 736 | for (size_t i = 0; i < 4; i++) { | ||
| 737 | mask_results[i] = (value >> (i * 2)) & 0x3; | ||
| 738 | } | ||
| 739 | }; | ||
| 740 | resolve_mask(swizzles_mask_a, value_3); | ||
| 741 | size_t coordinate_index = 0; | ||
| 742 | const auto resolve_pending = [&](IR::Value resolve_v) { | ||
| 743 | IR::Inst* const inst_r = resolve_v.InstRecursive(); | ||
| 744 | if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) { | ||
| 745 | return false; | ||
| 746 | } | ||
| 747 | if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) { | ||
| 748 | return false; | ||
| 749 | } | ||
| 750 | if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) { | ||
| 751 | return false; | ||
| 752 | } | ||
| 753 | resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve()); | ||
| 754 | return true; | ||
| 755 | }; | ||
| 756 | if (value_1.IsImmediate() || value_2.IsImmediate()) { | ||
| 757 | return false; | ||
| 758 | } | ||
| 759 | bool should_continue = false; | ||
| 760 | if (resolve_pending(value_1)) { | ||
| 761 | should_continue = check_through_shuffle(value_2, temporary_values[0]); | ||
| 762 | coordinate_index = 0; | ||
| 763 | } | ||
| 764 | if (resolve_pending(value_2)) { | ||
| 765 | should_continue = check_through_shuffle(value_1, temporary_values[0]); | ||
| 766 | coordinate_index = 2; | ||
| 767 | } | ||
| 768 | if (!should_continue) { | ||
| 769 | return false; | ||
| 770 | } | ||
| 771 | // figure which is which | ||
| 772 | size_t zero_mask_a = 0; | ||
| 773 | size_t zero_mask_b = 0; | ||
| 774 | for (size_t i = 0; i < 4; i++) { | ||
| 775 | if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) { | ||
| 776 | // last operand can be inversed, we cannot determine a result. | ||
| 777 | return false; | ||
| 778 | } | ||
| 779 | zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i; | ||
| 780 | zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i; | ||
| 781 | } | ||
| 782 | static constexpr size_t ddx_pattern = 0b1010; | ||
| 783 | static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111; | ||
| 784 | if (std::popcount(zero_mask_a) != 2) { | ||
| 785 | return false; | ||
| 786 | } | ||
| 787 | if (std::popcount(zero_mask_b) != 2) { | ||
| 788 | return false; | ||
| 789 | } | ||
| 790 | if (zero_mask_a == zero_mask_b) { | ||
| 791 | return false; | ||
| 792 | } | ||
| 793 | results[0] = temporary_values[coordinate_index]; | ||
| 794 | |||
| 795 | if (coordinate_index == 0) { | ||
| 796 | if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) { | ||
| 797 | results[1] = temporary_values[1]; | ||
| 798 | results[2] = temporary_values[2]; | ||
| 799 | return true; | ||
| 800 | } | ||
| 801 | results[2] = temporary_values[1]; | ||
| 802 | results[1] = temporary_values[2]; | ||
| 803 | } else { | ||
| 804 | const auto assign_result = [&results](IR::Value temporary_value, size_t mask) { | ||
| 805 | if (mask == ddx_pattern || mask == ddx_pattern_inv) { | ||
| 806 | results[1] = temporary_value; | ||
| 807 | return; | ||
| 808 | } | ||
| 809 | results[2] = temporary_value; | ||
| 810 | }; | ||
| 811 | assign_result(temporary_values[1], zero_mask_b); | ||
| 812 | assign_result(temporary_values[0], zero_mask_a); | ||
| 813 | } | ||
| 814 | |||
| 815 | return true; | ||
| 816 | } | ||
| 817 | |||
| 818 | void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | ||
| 819 | IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>(); | ||
| 820 | auto orig_opcode = inst.GetOpcode(); | ||
| 821 | if (info.ndv_is_active == 0) { | ||
| 822 | return; | ||
| 823 | } | ||
| 824 | if (info.type != TextureType::Color3D) { | ||
| 825 | return; | ||
| 826 | } | ||
| 827 | const IR::Value handle{inst.Arg(0)}; | ||
| 828 | const IR::Value coords{inst.Arg(1)}; | ||
| 829 | const IR::Value bias_lc{inst.Arg(2)}; | ||
| 830 | const IR::Value offset{inst.Arg(3)}; | ||
| 831 | if (!offset.IsImmediate()) { | ||
| 832 | return; | ||
| 833 | } | ||
| 834 | IR::Inst* const inst2 = coords.InstRecursive(); | ||
| 835 | std::array<std::array<IR::Value, 3>, 3> results_matrix; | ||
| 836 | for (size_t i = 0; i < 3; i++) { | ||
| 837 | if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { | ||
| 838 | return; | ||
| 839 | } | ||
| 840 | } | ||
| 841 | IR::F32 lod_clamp{}; | ||
| 842 | if (info.has_lod_clamp != 0) { | ||
| 843 | if (!bias_lc.IsImmediate()) { | ||
| 844 | lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()}; | ||
| 845 | } else { | ||
| 846 | lod_clamp = IR::F32{bias_lc}; | ||
| 847 | } | ||
| 848 | } | ||
| 849 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 850 | IR::Value new_coords = | ||
| 851 | ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]); | ||
| 852 | IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], | ||
| 853 | results_matrix[1][1], results_matrix[1][2]); | ||
| 854 | IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); | ||
| 855 | info.num_derivates.Assign(3); | ||
| 856 | IR::Value new_gradient_instruction = | ||
| 857 | ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); | ||
| 858 | IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); | ||
| 859 | if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) { | ||
| 860 | new_inst->ReplaceOpcode(IR::Opcode::ImageGradient); | ||
| 861 | } | ||
| 862 | inst.ReplaceUsesWith(new_gradient_instruction); | ||
| 863 | } | ||
| 864 | |||
| 651 | void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { | 865 | void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { |
| 652 | const IR::Value bank{inst.Arg(0)}; | 866 | const IR::Value bank{inst.Arg(0)}; |
| 653 | const IR::Value offset{inst.Arg(1)}; | 867 | const IR::Value offset{inst.Arg(1)}; |
| @@ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | |||
| 743 | case IR::Opcode::SelectF32: | 957 | case IR::Opcode::SelectF32: |
| 744 | case IR::Opcode::SelectF64: | 958 | case IR::Opcode::SelectF64: |
| 745 | return FoldSelect(inst); | 959 | return FoldSelect(inst); |
| 960 | case IR::Opcode::FPNeg32: | ||
| 961 | FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); | ||
| 962 | return; | ||
| 963 | case IR::Opcode::FPAdd32: | ||
| 964 | FoldFPAdd32(inst); | ||
| 965 | return; | ||
| 746 | case IR::Opcode::FPMul32: | 966 | case IR::Opcode::FPMul32: |
| 747 | return FoldFPMul32(inst); | 967 | return FoldFPMul32(inst); |
| 748 | case IR::Opcode::LogicalAnd: | 968 | case IR::Opcode::LogicalAnd: |
| @@ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | |||
| 858 | FoldDriverConstBuffer(env, block, inst, 1); | 1078 | FoldDriverConstBuffer(env, block, inst, 1); |
| 859 | } | 1079 | } |
| 860 | break; | 1080 | break; |
| 1081 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 1082 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 1083 | case IR::Opcode::ImageSampleImplicitLod: | ||
| 1084 | FoldImageSampleImplicitLod(block, inst); | ||
| 1085 | break; | ||
| 861 | default: | 1086 | default: |
| 862 | break; | 1087 | break; |
| 863 | } | 1088 | } |