summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/ir_opt
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/ir_opt')
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp10
-rw-r--r--src/shader_recompiler/ir_opt/passes.h1
-rw-r--r--src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp79
3 files changed, 85 insertions, 5 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index f46e55122..ec12c843a 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) {
428 } 428 }
429} 429}
430 430
431bool FoldDerivateYFromCorrection(IR::Inst& inst) { 431bool FoldDerivativeYFromCorrection(IR::Inst& inst) {
432 const IR::Value lhs_value{inst.Arg(0)}; 432 const IR::Value lhs_value{inst.Arg(0)};
433 const IR::Value rhs_value{inst.Arg(1)}; 433 const IR::Value rhs_value{inst.Arg(1)};
434 IR::Inst* const lhs_op{lhs_value.InstRecursive()}; 434 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
@@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) {
464 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { 464 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
465 return; 465 return;
466 } 466 }
467 if (FoldDerivateYFromCorrection(inst)) { 467 if (FoldDerivativeYFromCorrection(inst)) {
468 return; 468 return;
469 } 469 }
470 IR::Inst* const lhs_op{lhs_value.InstRecursive()}; 470 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
@@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
699 } 699 }
700} 700}
701 701
702bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { 702bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) {
703 if (coord.IsImmediate()) { 703 if (coord.IsImmediate()) {
704 return false; 704 return false;
705 } 705 }
@@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
834 IR::Inst* const inst2 = coords.InstRecursive(); 834 IR::Inst* const inst2 = coords.InstRecursive();
835 std::array<std::array<IR::Value, 3>, 3> results_matrix; 835 std::array<std::array<IR::Value, 3>, 3> results_matrix;
836 for (size_t i = 0; i < 3; i++) { 836 for (size_t i = 0; i < 3; i++) {
837 if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { 837 if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) {
838 return; 838 return;
839 } 839 }
840 } 840 }
@@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
852 IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], 852 IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
853 results_matrix[1][1], results_matrix[1][2]); 853 results_matrix[1][1], results_matrix[1][2]);
854 IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); 854 IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
855 info.num_derivates.Assign(3); 855 info.num_derivatives.Assign(3);
856 IR::Value new_gradient_instruction = 856 IR::Value new_gradient_instruction =
857 ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); 857 ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
858 IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); 858 IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 629d18fa1..d4d5285e5 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program);
26void PositionPass(Environment& env, IR::Program& program); 26void PositionPass(Environment& env, IR::Program& program);
27void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); 27void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info);
28void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); 28void LayerPass(IR::Program& program, const HostTranslateInfo& host_info);
29void VendorWorkaroundPass(IR::Program& program);
29void VerificationPass(const IR::Program& program); 30void VerificationPass(const IR::Program& program);
30 31
31// Dual Vertex 32// Dual Vertex
diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
new file mode 100644
index 000000000..08c658cb8
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
@@ -0,0 +1,79 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "shader_recompiler/frontend/ir/basic_block.h"
5#include "shader_recompiler/frontend/ir/ir_emitter.h"
6#include "shader_recompiler/frontend/ir/value.h"
7#include "shader_recompiler/ir_opt/passes.h"
8
9namespace Shader::Optimization {
10
11namespace {
12void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) {
13 /*
14 * Workaround for an NVIDIA bug seen in Super Mario RPG
15 *
16 * We are looking for this pattern:
17 * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16
18 * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional?
19 * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16
20 * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16
21 * %result = IAdd32 %lhs_shl, %rhs_bfe
22 *
23 * And replacing the IAdd32 with a BitwiseOr32
24 * %result = BitwiseOr32 %lhs_shl, %rhs_bfe
25 *
26 */
27 IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
28 IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()};
29 if (!lhs_shl || !rhs_bfe) {
30 return;
31 }
32 if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
33 lhs_shl->Arg(1) != IR::Value{16U}) {
34 return;
35 }
36 if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} ||
37 rhs_bfe->Arg(2) != IR::Value{16U}) {
38 return;
39 }
40 IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
41 if (!lhs_mul) {
42 return;
43 }
44 const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract};
45 if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 &&
46 lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) {
47 return;
48 }
49 IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()};
50 if (!lhs_bfe) {
51 return;
52 }
53 if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
54 return;
55 }
56 if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
57 return;
58 }
59 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
60 inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)}));
61}
62
63} // Anonymous namespace
64
65void VendorWorkaroundPass(IR::Program& program) {
66 for (IR::Block* const block : program.post_order_blocks) {
67 for (IR::Inst& inst : block->Instructions()) {
68 switch (inst.GetOpcode()) {
69 case IR::Opcode::IAdd32:
70 AddingByteSwapsWorkaround(*block, inst);
71 break;
72 default:
73 break;
74 }
75 }
76 }
77}
78
79} // namespace Shader::Optimization