diff options
Diffstat (limited to 'src/shader_recompiler/ir_opt')
| -rw-r--r-- | src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | 10 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/passes.h | 1 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp | 79 |
3 files changed, 85 insertions, 5 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f46e55122..ec12c843a 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) { | |||
| 428 | } | 428 | } |
| 429 | } | 429 | } |
| 430 | 430 | ||
| 431 | bool FoldDerivateYFromCorrection(IR::Inst& inst) { | 431 | bool FoldDerivativeYFromCorrection(IR::Inst& inst) { |
| 432 | const IR::Value lhs_value{inst.Arg(0)}; | 432 | const IR::Value lhs_value{inst.Arg(0)}; |
| 433 | const IR::Value rhs_value{inst.Arg(1)}; | 433 | const IR::Value rhs_value{inst.Arg(1)}; |
| 434 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | 434 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; |
| @@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) { | |||
| 464 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { | 464 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { |
| 465 | return; | 465 | return; |
| 466 | } | 466 | } |
| 467 | if (FoldDerivateYFromCorrection(inst)) { | 467 | if (FoldDerivativeYFromCorrection(inst)) { |
| 468 | return; | 468 | return; |
| 469 | } | 469 | } |
| 470 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | 470 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; |
| @@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | |||
| 699 | } | 699 | } |
| 700 | } | 700 | } |
| 701 | 701 | ||
| 702 | bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { | 702 | bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) { |
| 703 | if (coord.IsImmediate()) { | 703 | if (coord.IsImmediate()) { |
| 704 | return false; | 704 | return false; |
| 705 | } | 705 | } |
| @@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | |||
| 834 | IR::Inst* const inst2 = coords.InstRecursive(); | 834 | IR::Inst* const inst2 = coords.InstRecursive(); |
| 835 | std::array<std::array<IR::Value, 3>, 3> results_matrix; | 835 | std::array<std::array<IR::Value, 3>, 3> results_matrix; |
| 836 | for (size_t i = 0; i < 3; i++) { | 836 | for (size_t i = 0; i < 3; i++) { |
| 837 | if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { | 837 | if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) { |
| 838 | return; | 838 | return; |
| 839 | } | 839 | } |
| 840 | } | 840 | } |
| @@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | |||
| 852 | IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], | 852 | IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], |
| 853 | results_matrix[1][1], results_matrix[1][2]); | 853 | results_matrix[1][1], results_matrix[1][2]); |
| 854 | IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); | 854 | IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); |
| 855 | info.num_derivates.Assign(3); | 855 | info.num_derivatives.Assign(3); |
| 856 | IR::Value new_gradient_instruction = | 856 | IR::Value new_gradient_instruction = |
| 857 | ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); | 857 | ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); |
| 858 | IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); | 858 | IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 629d18fa1..d4d5285e5 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program); | |||
| 26 | void PositionPass(Environment& env, IR::Program& program); | 26 | void PositionPass(Environment& env, IR::Program& program); |
| 27 | void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); | 27 | void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); |
| 28 | void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); | 28 | void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); |
| 29 | void VendorWorkaroundPass(IR::Program& program); | ||
| 29 | void VerificationPass(const IR::Program& program); | 30 | void VerificationPass(const IR::Program& program); |
| 30 | 31 | ||
| 31 | // Dual Vertex | 32 | // Dual Vertex |
diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp new file mode 100644 index 000000000..08c658cb8 --- /dev/null +++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 5 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 7 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 8 | |||
| 9 | namespace Shader::Optimization { | ||
| 10 | |||
| 11 | namespace { | ||
| 12 | void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) { | ||
| 13 | /* | ||
| 14 | * Workaround for an NVIDIA bug seen in Super Mario RPG | ||
| 15 | * | ||
| 16 | * We are looking for this pattern: | ||
| 17 | * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16 | ||
| 18 | * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional? | ||
| 19 | * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16 | ||
| 20 | * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16 | ||
| 21 | * %result = IAdd32 %lhs_shl, %rhs_bfe | ||
| 22 | * | ||
| 23 | * And replacing the IAdd32 with a BitwiseOr32 | ||
| 24 | * %result = BitwiseOr32 %lhs_shl, %rhs_bfe | ||
| 25 | * | ||
| 26 | */ | ||
| 27 | IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()}; | ||
| 28 | IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()}; | ||
| 29 | if (!lhs_shl || !rhs_bfe) { | ||
| 30 | return; | ||
| 31 | } | ||
| 32 | if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || | ||
| 33 | lhs_shl->Arg(1) != IR::Value{16U}) { | ||
| 34 | return; | ||
| 35 | } | ||
| 36 | if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} || | ||
| 37 | rhs_bfe->Arg(2) != IR::Value{16U}) { | ||
| 38 | return; | ||
| 39 | } | ||
| 40 | IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()}; | ||
| 41 | if (!lhs_mul) { | ||
| 42 | return; | ||
| 43 | } | ||
| 44 | const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract}; | ||
| 45 | if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 && | ||
| 46 | lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||
| 47 | return; | ||
| 48 | } | ||
| 49 | IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()}; | ||
| 50 | if (!lhs_bfe) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||
| 54 | return; | ||
| 55 | } | ||
| 56 | if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) { | ||
| 57 | return; | ||
| 58 | } | ||
| 59 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 60 | inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)})); | ||
| 61 | } | ||
| 62 | |||
| 63 | } // Anonymous namespace | ||
| 64 | |||
| 65 | void VendorWorkaroundPass(IR::Program& program) { | ||
| 66 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 67 | for (IR::Inst& inst : block->Instructions()) { | ||
| 68 | switch (inst.GetOpcode()) { | ||
| 69 | case IR::Opcode::IAdd32: | ||
| 70 | AddingByteSwapsWorkaround(*block, inst); | ||
| 71 | break; | ||
| 72 | default: | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | } // namespace Shader::Optimization | ||