summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/CMakeLists.txt2
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp20
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h5
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp26
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.h4
-rw-r--r--src/shader_recompiler/frontend/ir/opcode.inc22
-rw-r--r--src/shader_recompiler/frontend/ir/type.cpp2
-rw-r--r--src/shader_recompiler/frontend/ir/type.h1
-rw-r--r--src/shader_recompiler/frontend/ir/value.cpp17
-rw-r--r--src/shader_recompiler/frontend/ir/value.h1
-rw-r--r--src/shader_recompiler/frontend/maxwell/program.cpp6
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp146
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp331
-rw-r--r--src/shader_recompiler/ir_opt/identity_removal_pass.cpp28
-rw-r--r--src/shader_recompiler/ir_opt/passes.h6
-rw-r--r--src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp56
-rw-r--r--src/shader_recompiler/ir_opt/verification_pass.cpp42
17 files changed, 652 insertions, 63 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index f5dd4d29e..72d5f41d2 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -59,7 +59,9 @@ add_executable(shader_recompiler
59 frontend/maxwell/translate/impl/move_special_register.cpp 59 frontend/maxwell/translate/impl/move_special_register.cpp
60 frontend/maxwell/translate/translate.cpp 60 frontend/maxwell/translate/translate.cpp
61 frontend/maxwell/translate/translate.h 61 frontend/maxwell/translate/translate.h
62 ir_opt/constant_propagation_pass.cpp
62 ir_opt/dead_code_elimination_pass.cpp 63 ir_opt/dead_code_elimination_pass.cpp
64 ir_opt/global_memory_to_storage_buffer_pass.cpp
63 ir_opt/identity_removal_pass.cpp 65 ir_opt/identity_removal_pass.cpp
64 ir_opt/passes.h 66 ir_opt/passes.h
65 ir_opt/ssa_rewrite_pass.cpp 67 ir_opt/ssa_rewrite_pass.cpp
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 87b253c9a..1c5ae0109 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -504,6 +504,20 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
504 } 504 }
505} 505}
506 506
507U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
508 if (a.Type() != b.Type()) {
509 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
510 }
511 switch (a.Type()) {
512 case Type::U32:
513 return Inst<U32>(Opcode::ISub32, a, b);
514 case Type::U64:
515 return Inst<U64>(Opcode::ISub64, a, b);
516 default:
517 ThrowInvalidType(a.Type());
518 }
519}
520
507U32 IREmitter::IMul(const U32& a, const U32& b) { 521U32 IREmitter::IMul(const U32& a, const U32& b) {
508 return Inst<U32>(Opcode::IMul32, a, b); 522 return Inst<U32>(Opcode::IMul32, a, b);
509} 523}
@@ -679,8 +693,8 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& v
679 } 693 }
680} 694}
681 695
682U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) { 696U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) {
683 switch (bitsize) { 697 switch (result_bitsize) {
684 case 32: 698 case 32:
685 switch (value.Type()) { 699 switch (value.Type()) {
686 case Type::U32: 700 case Type::U32:
@@ -703,7 +717,7 @@ U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) {
703 break; 717 break;
704 } 718 }
705 } 719 }
706 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), bitsize); 720 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
707} 721}
708 722
709} // namespace Shader::IR 723} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 7ff763ecf..84b844898 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -17,6 +17,8 @@ namespace Shader::IR {
17class IREmitter { 17class IREmitter {
18public: 18public:
19 explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} 19 explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {}
20 explicit IREmitter(Block& block_, Block::iterator insertion_point_)
21 : block{block_}, insertion_point{insertion_point_} {}
20 22
21 Block& block; 23 Block& block;
22 24
@@ -125,6 +127,7 @@ public:
125 [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); 127 [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value);
126 128
127 [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); 129 [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
130 [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
128 [[nodiscard]] U32 IMul(const U32& a, const U32& b); 131 [[nodiscard]] U32 IMul(const U32& a, const U32& b);
129 [[nodiscard]] U32 INeg(const U32& value); 132 [[nodiscard]] U32 INeg(const U32& value);
130 [[nodiscard]] U32 IAbs(const U32& value); 133 [[nodiscard]] U32 IAbs(const U32& value);
@@ -155,7 +158,7 @@ public:
155 [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value); 158 [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value);
156 [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value); 159 [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value);
157 160
158 [[nodiscard]] U32U64 ConvertU(size_t bitsize, const U32U64& value); 161 [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value);
159 162
160private: 163private:
161 IR::Block::iterator insertion_point; 164 IR::Block::iterator insertion_point;
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index ecf76e23d..de953838c 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6
5#include "shader_recompiler/exception.h" 7#include "shader_recompiler/exception.h"
6#include "shader_recompiler/frontend/ir/microinstruction.h" 8#include "shader_recompiler/frontend/ir/microinstruction.h"
7#include "shader_recompiler/frontend/ir/type.h" 9#include "shader_recompiler/frontend/ir/type.h"
@@ -44,6 +46,13 @@ bool Inst::MayHaveSideEffects() const noexcept {
44 case Opcode::WriteGlobal32: 46 case Opcode::WriteGlobal32:
45 case Opcode::WriteGlobal64: 47 case Opcode::WriteGlobal64:
46 case Opcode::WriteGlobal128: 48 case Opcode::WriteGlobal128:
49 case Opcode::WriteStorageU8:
50 case Opcode::WriteStorageS8:
51 case Opcode::WriteStorageU16:
52 case Opcode::WriteStorageS16:
53 case Opcode::WriteStorage32:
54 case Opcode::WriteStorage64:
55 case Opcode::WriteStorage128:
47 return true; 56 return true;
48 default: 57 default:
49 return false; 58 return false;
@@ -56,15 +65,19 @@ bool Inst::IsPseudoInstruction() const noexcept {
56 case Opcode::GetSignFromOp: 65 case Opcode::GetSignFromOp:
57 case Opcode::GetCarryFromOp: 66 case Opcode::GetCarryFromOp:
58 case Opcode::GetOverflowFromOp: 67 case Opcode::GetOverflowFromOp:
59 case Opcode::GetZSCOFromOp:
60 return true; 68 return true;
61 default: 69 default:
62 return false; 70 return false;
63 } 71 }
64} 72}
65 73
74bool Inst::AreAllArgsImmediates() const noexcept {
75 return std::all_of(args.begin(), args.begin() + NumArgs(),
76 [](const IR::Value& value) { return value.IsImmediate(); });
77}
78
66bool Inst::HasAssociatedPseudoOperation() const noexcept { 79bool Inst::HasAssociatedPseudoOperation() const noexcept {
67 return zero_inst || sign_inst || carry_inst || overflow_inst || zsco_inst; 80 return zero_inst || sign_inst || carry_inst || overflow_inst;
68} 81}
69 82
70Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { 83Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
@@ -82,9 +95,6 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
82 case Opcode::GetOverflowFromOp: 95 case Opcode::GetOverflowFromOp:
83 CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); 96 CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp);
84 return overflow_inst; 97 return overflow_inst;
85 case Opcode::GetZSCOFromOp:
86 CheckPseudoInstruction(zsco_inst, Opcode::GetZSCOFromOp);
87 return zsco_inst;
88 default: 98 default:
89 throw InvalidArgument("{} is not a pseudo-instruction", opcode); 99 throw InvalidArgument("{} is not a pseudo-instruction", opcode);
90 } 100 }
@@ -176,9 +186,6 @@ void Inst::Use(const Value& value) {
176 case Opcode::GetOverflowFromOp: 186 case Opcode::GetOverflowFromOp:
177 SetPseudoInstruction(value.Inst()->overflow_inst, this); 187 SetPseudoInstruction(value.Inst()->overflow_inst, this);
178 break; 188 break;
179 case Opcode::GetZSCOFromOp:
180 SetPseudoInstruction(value.Inst()->zsco_inst, this);
181 break;
182 default: 189 default:
183 break; 190 break;
184 } 191 }
@@ -200,9 +207,6 @@ void Inst::UndoUse(const Value& value) {
200 case Opcode::GetOverflowFromOp: 207 case Opcode::GetOverflowFromOp:
201 RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp); 208 RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp);
202 break; 209 break;
203 case Opcode::GetZSCOFromOp:
204 RemovePseudoInstruction(value.Inst()->zsco_inst, Opcode::GetZSCOFromOp);
205 break;
206 default: 210 default:
207 break; 211 break;
208 } 212 }
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h
index 61849695a..22101c9e2 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.h
+++ b/src/shader_recompiler/frontend/ir/microinstruction.h
@@ -49,6 +49,9 @@ public:
49 /// Pseudo-instructions depend on their parent instructions for their semantics. 49 /// Pseudo-instructions depend on their parent instructions for their semantics.
50 [[nodiscard]] bool IsPseudoInstruction() const noexcept; 50 [[nodiscard]] bool IsPseudoInstruction() const noexcept;
51 51
52 /// Determines if all arguments of this instruction are immediates.
53 [[nodiscard]] bool AreAllArgsImmediates() const noexcept;
54
52 /// Determines if there is a pseudo-operation associated with this instruction. 55 /// Determines if there is a pseudo-operation associated with this instruction.
53 [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; 56 [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept;
54 /// Gets a pseudo-operation associated with this instruction 57 /// Gets a pseudo-operation associated with this instruction
@@ -94,7 +97,6 @@ private:
94 Inst* sign_inst{}; 97 Inst* sign_inst{};
95 Inst* carry_inst{}; 98 Inst* carry_inst{};
96 Inst* overflow_inst{}; 99 Inst* overflow_inst{};
97 Inst* zsco_inst{};
98 std::vector<std::pair<Block*, Value>> phi_operands; 100 std::vector<std::pair<Block*, Value>> phi_operands;
99 u64 flags{}; 101 u64 flags{};
100}; 102};
diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc
index 4ecb5e936..4596bf39f 100644
--- a/src/shader_recompiler/frontend/ir/opcode.inc
+++ b/src/shader_recompiler/frontend/ir/opcode.inc
@@ -24,9 +24,6 @@ OPCODE(GetAttribute, U32, Attr
24OPCODE(SetAttribute, U32, Attribute, ) 24OPCODE(SetAttribute, U32, Attribute, )
25OPCODE(GetAttributeIndexed, U32, U32, ) 25OPCODE(GetAttributeIndexed, U32, U32, )
26OPCODE(SetAttributeIndexed, U32, U32, ) 26OPCODE(SetAttributeIndexed, U32, U32, )
27OPCODE(GetZSCORaw, U32, )
28OPCODE(SetZSCORaw, Void, U32, )
29OPCODE(SetZSCO, Void, ZSCO, )
30OPCODE(GetZFlag, U1, Void, ) 27OPCODE(GetZFlag, U1, Void, )
31OPCODE(GetSFlag, U1, Void, ) 28OPCODE(GetSFlag, U1, Void, )
32OPCODE(GetCFlag, U1, Void, ) 29OPCODE(GetCFlag, U1, Void, )
@@ -65,6 +62,22 @@ OPCODE(WriteGlobal32, Void, U64,
65OPCODE(WriteGlobal64, Void, U64, Opaque, ) 62OPCODE(WriteGlobal64, Void, U64, Opaque, )
66OPCODE(WriteGlobal128, Void, U64, Opaque, ) 63OPCODE(WriteGlobal128, Void, U64, Opaque, )
67 64
65// Storage buffer operations
66OPCODE(LoadStorageU8, U32, U32, U32, )
67OPCODE(LoadStorageS8, U32, U32, U32, )
68OPCODE(LoadStorageU16, U32, U32, U32, )
69OPCODE(LoadStorageS16, U32, U32, U32, )
70OPCODE(LoadStorage32, U32, U32, U32, )
71OPCODE(LoadStorage64, Opaque, U32, U32, )
72OPCODE(LoadStorage128, Opaque, U32, U32, )
73OPCODE(WriteStorageU8, Void, U32, U32, U32, )
74OPCODE(WriteStorageS8, Void, U32, U32, U32, )
75OPCODE(WriteStorageU16, Void, U32, U32, U32, )
76OPCODE(WriteStorageS16, Void, U32, U32, U32, )
77OPCODE(WriteStorage32, Void, U32, U32, U32, )
78OPCODE(WriteStorage64, Void, U32, U32, Opaque, )
79OPCODE(WriteStorage128, Void, U32, U32, Opaque, )
80
68// Vector utility 81// Vector utility
69OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, ) 82OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, )
70OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, ) 83OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, )
@@ -90,7 +103,6 @@ OPCODE(GetZeroFromOp, U1, Opaq
90OPCODE(GetSignFromOp, U1, Opaque, ) 103OPCODE(GetSignFromOp, U1, Opaque, )
91OPCODE(GetCarryFromOp, U1, Opaque, ) 104OPCODE(GetCarryFromOp, U1, Opaque, )
92OPCODE(GetOverflowFromOp, U1, Opaque, ) 105OPCODE(GetOverflowFromOp, U1, Opaque, )
93OPCODE(GetZSCOFromOp, ZSCO, Opaque, )
94 106
95// Floating-point operations 107// Floating-point operations
96OPCODE(FPAbs16, U16, U16, ) 108OPCODE(FPAbs16, U16, U16, )
@@ -143,6 +155,8 @@ OPCODE(FPTrunc64, U64, U64,
143// Integer operations 155// Integer operations
144OPCODE(IAdd32, U32, U32, U32, ) 156OPCODE(IAdd32, U32, U32, U32, )
145OPCODE(IAdd64, U64, U64, U64, ) 157OPCODE(IAdd64, U64, U64, U64, )
158OPCODE(ISub32, U32, U32, U32, )
159OPCODE(ISub64, U64, U64, U64, )
146OPCODE(IMul32, U32, U32, U32, ) 160OPCODE(IMul32, U32, U32, U32, )
147OPCODE(INeg32, U32, U32, ) 161OPCODE(INeg32, U32, U32, )
148OPCODE(IAbs32, U32, U32, ) 162OPCODE(IAbs32, U32, U32, )
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp
index da1e2a0f6..13cc09195 100644
--- a/src/shader_recompiler/frontend/ir/type.cpp
+++ b/src/shader_recompiler/frontend/ir/type.cpp
@@ -11,7 +11,7 @@ namespace Shader::IR {
11 11
12std::string NameOf(Type type) { 12std::string NameOf(Type type) {
13 static constexpr std::array names{ 13 static constexpr std::array names{
14 "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", "ZSCO", 14 "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64",
15 }; 15 };
16 const size_t bits{static_cast<size_t>(type)}; 16 const size_t bits{static_cast<size_t>(type)};
17 if (bits == 0) { 17 if (bits == 0) {
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
index f753628e8..397875018 100644
--- a/src/shader_recompiler/frontend/ir/type.h
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -25,7 +25,6 @@ enum class Type {
25 U16 = 1 << 7, 25 U16 = 1 << 7,
26 U32 = 1 << 8, 26 U32 = 1 << 8,
27 U64 = 1 << 9, 27 U64 = 1 << 9,
28 ZSCO = 1 << 10,
29}; 28};
30DECLARE_ENUM_FLAG_OPERATORS(Type) 29DECLARE_ENUM_FLAG_OPERATORS(Type)
31 30
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index 1e974e88c..59a9b10dc 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -91,26 +91,41 @@ IR::Attribute Value::Attribute() const {
91} 91}
92 92
93bool Value::U1() const { 93bool Value::U1() const {
94 if (IsIdentity()) {
95 return inst->Arg(0).U1();
96 }
94 ValidateAccess(Type::U1); 97 ValidateAccess(Type::U1);
95 return imm_u1; 98 return imm_u1;
96} 99}
97 100
98u8 Value::U8() const { 101u8 Value::U8() const {
102 if (IsIdentity()) {
103 return inst->Arg(0).U8();
104 }
99 ValidateAccess(Type::U8); 105 ValidateAccess(Type::U8);
100 return imm_u8; 106 return imm_u8;
101} 107}
102 108
103u16 Value::U16() const { 109u16 Value::U16() const {
110 if (IsIdentity()) {
111 return inst->Arg(0).U16();
112 }
104 ValidateAccess(Type::U16); 113 ValidateAccess(Type::U16);
105 return imm_u16; 114 return imm_u16;
106} 115}
107 116
108u32 Value::U32() const { 117u32 Value::U32() const {
118 if (IsIdentity()) {
119 return inst->Arg(0).U32();
120 }
109 ValidateAccess(Type::U32); 121 ValidateAccess(Type::U32);
110 return imm_u32; 122 return imm_u32;
111} 123}
112 124
113u64 Value::U64() const { 125u64 Value::U64() const {
126 if (IsIdentity()) {
127 return inst->Arg(0).U64();
128 }
114 ValidateAccess(Type::U64); 129 ValidateAccess(Type::U64);
115 return imm_u64; 130 return imm_u64;
116} 131}
@@ -142,8 +157,6 @@ bool Value::operator==(const Value& other) const {
142 return imm_u32 == other.imm_u32; 157 return imm_u32 == other.imm_u32;
143 case Type::U64: 158 case Type::U64:
144 return imm_u64 == other.imm_u64; 159 return imm_u64 == other.imm_u64;
145 case Type::ZSCO:
146 throw NotImplementedException("ZSCO comparison");
147 } 160 }
148 throw LogicError("Invalid type {}", type); 161 throw LogicError("Invalid type {}", type);
149} 162}
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 368119921..31f831794 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -96,6 +96,5 @@ using U64 = TypedValue<Type::U64>;
96using U32U64 = TypedValue<Type::U32 | Type::U64>; 96using U32U64 = TypedValue<Type::U32 | Type::U64>;
97using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>; 97using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
98using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>; 98using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
99using ZSCO = TypedValue<Type::ZSCO>;
100 99
101} // namespace Shader::IR 100} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index bd1f96c07..b3f2de852 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -52,9 +52,11 @@ Program::Program(Environment& env, const Flow::CFG& cfg) {
52 } 52 }
53 std::ranges::for_each(functions, Optimization::SsaRewritePass); 53 std::ranges::for_each(functions, Optimization::SsaRewritePass);
54 for (IR::Function& function : functions) { 54 for (IR::Function& function : functions) {
55 Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function);
56 Optimization::Invoke(Optimization::ConstantPropagationPass, function);
55 Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); 57 Optimization::Invoke(Optimization::DeadCodeEliminationPass, function);
56 Optimization::Invoke(Optimization::IdentityRemovalPass, function); 58 Optimization::IdentityRemovalPass(function);
57 // Optimization::Invoke(Optimization::VerificationPass, function); 59 Optimization::VerificationPass(function);
58 } 60 }
59 //*/ 61 //*/
60} 62}
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
new file mode 100644
index 000000000..02f5b653d
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -0,0 +1,146 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <type_traits>
7
8#include "common/bit_util.h"
9#include "shader_recompiler/exception.h"
10#include "shader_recompiler/frontend/ir/microinstruction.h"
11#include "shader_recompiler/ir_opt/passes.h"
12
13namespace Shader::Optimization {
14namespace {
15[[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) {
16 if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) {
17 throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count);
18 }
19 return (base >> shift) & ((1U << count) - 1);
20}
21
22template <typename T>
23[[nodiscard]] T Arg(const IR::Value& value) {
24 if constexpr (std::is_same_v<T, bool>) {
25 return value.U1();
26 } else if constexpr (std::is_same_v<T, u32>) {
27 return value.U32();
28 } else if constexpr (std::is_same_v<T, u64>) {
29 return value.U64();
30 }
31}
32
33template <typename ImmFn>
34bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
35 const auto arg = [](const IR::Value& value) {
36 if constexpr (std::is_invocable_r_v<bool, ImmFn, bool, bool>) {
37 return value.U1();
38 } else if constexpr (std::is_invocable_r_v<u32, ImmFn, u32, u32>) {
39 return value.U32();
40 } else if constexpr (std::is_invocable_r_v<u64, ImmFn, u64, u64>) {
41 return value.U64();
42 }
43 };
44 const IR::Value lhs{inst.Arg(0)};
45 const IR::Value rhs{inst.Arg(1)};
46
47 const bool is_lhs_immediate{lhs.IsImmediate()};
48 const bool is_rhs_immediate{rhs.IsImmediate()};
49
50 if (is_lhs_immediate && is_rhs_immediate) {
51 const auto result{imm_fn(arg(lhs), arg(rhs))};
52 inst.ReplaceUsesWith(IR::Value{result});
53 return false;
54 }
55 if (is_lhs_immediate && !is_rhs_immediate) {
56 IR::Inst* const rhs_inst{rhs.InstRecursive()};
57 if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) {
58 const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))};
59 inst.SetArg(0, rhs_inst->Arg(0));
60 inst.SetArg(1, IR::Value{combined});
61 } else {
62 // Normalize
63 inst.SetArg(0, rhs);
64 inst.SetArg(1, lhs);
65 }
66 }
67 if (!is_lhs_immediate && is_rhs_immediate) {
68 const IR::Inst* const lhs_inst{lhs.InstRecursive()};
69 if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) {
70 const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))};
71 inst.SetArg(0, lhs_inst->Arg(0));
72 inst.SetArg(1, IR::Value{combined});
73 }
74 }
75 return true;
76}
77
78void FoldGetRegister(IR::Inst& inst) {
79 if (inst.Arg(0).Reg() == IR::Reg::RZ) {
80 inst.ReplaceUsesWith(IR::Value{u32{0}});
81 }
82}
83
84void FoldGetPred(IR::Inst& inst) {
85 if (inst.Arg(0).Pred() == IR::Pred::PT) {
86 inst.ReplaceUsesWith(IR::Value{true});
87 }
88}
89
90template <typename T>
91void FoldAdd(IR::Inst& inst) {
92 if (inst.HasAssociatedPseudoOperation()) {
93 return;
94 }
95 if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) {
96 return;
97 }
98 const IR::Value rhs{inst.Arg(1)};
99 if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
100 inst.ReplaceUsesWith(inst.Arg(0));
101 }
102}
103
104void FoldLogicalAnd(IR::Inst& inst) {
105 if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) {
106 return;
107 }
108 const IR::Value rhs{inst.Arg(1)};
109 if (rhs.IsImmediate()) {
110 if (rhs.U1()) {
111 inst.ReplaceUsesWith(inst.Arg(0));
112 } else {
113 inst.ReplaceUsesWith(IR::Value{false});
114 }
115 }
116}
117
118void ConstantPropagation(IR::Inst& inst) {
119 switch (inst.Opcode()) {
120 case IR::Opcode::GetRegister:
121 return FoldGetRegister(inst);
122 case IR::Opcode::GetPred:
123 return FoldGetPred(inst);
124 case IR::Opcode::IAdd32:
125 return FoldAdd<u32>(inst);
126 case IR::Opcode::IAdd64:
127 return FoldAdd<u64>(inst);
128 case IR::Opcode::BitFieldUExtract:
129 if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) {
130 inst.ReplaceUsesWith(IR::Value{
131 BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())});
132 }
133 break;
134 case IR::Opcode::LogicalAnd:
135 return FoldLogicalAnd(inst);
136 default:
137 break;
138 }
139}
140} // Anonymous namespace
141
142void ConstantPropagationPass(IR::Block& block) {
143 std::ranges::for_each(block, ConstantPropagation);
144}
145
146} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
new file mode 100644
index 000000000..ee69a5c9d
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -0,0 +1,331 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <compare>
7#include <optional>
8#include <ranges>
9
10#include <boost/container/flat_set.hpp>
11#include <boost/container/small_vector.hpp>
12
13#include "shader_recompiler/frontend/ir/basic_block.h"
14#include "shader_recompiler/frontend/ir/ir_emitter.h"
15#include "shader_recompiler/frontend/ir/microinstruction.h"
16#include "shader_recompiler/ir_opt/passes.h"
17
18namespace Shader::Optimization {
19namespace {
20/// Address in constant buffers to the storage buffer descriptor
21struct StorageBufferAddr {
22 auto operator<=>(const StorageBufferAddr&) const noexcept = default;
23
24 u32 index;
25 u32 offset;
26};
27
28/// Block iterator to a global memory instruction and the storage buffer it uses
29struct StorageInst {
30 StorageBufferAddr storage_buffer;
31 IR::Block::iterator inst;
32};
33
34/// Bias towards a certain range of constant buffers when looking for storage buffers
35struct Bias {
36 u32 index;
37 u32 offset_begin;
38 u32 offset_end;
39};
40
41using StorageBufferSet =
42 boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>,
43 boost::container::small_vector<StorageBufferAddr, 16>>;
44using StorageInstVector = boost::container::small_vector<StorageInst, 32>;
45
46/// Returns true when the instruction is a global memory instruction
47bool IsGlobalMemory(const IR::Inst& inst) {
48 switch (inst.Opcode()) {
49 case IR::Opcode::LoadGlobalS8:
50 case IR::Opcode::LoadGlobalU8:
51 case IR::Opcode::LoadGlobalS16:
52 case IR::Opcode::LoadGlobalU16:
53 case IR::Opcode::LoadGlobal32:
54 case IR::Opcode::LoadGlobal64:
55 case IR::Opcode::LoadGlobal128:
56 case IR::Opcode::WriteGlobalS8:
57 case IR::Opcode::WriteGlobalU8:
58 case IR::Opcode::WriteGlobalS16:
59 case IR::Opcode::WriteGlobalU16:
60 case IR::Opcode::WriteGlobal32:
61 case IR::Opcode::WriteGlobal64:
62 case IR::Opcode::WriteGlobal128:
63 return true;
64 default:
65 return false;
66 }
67}
68
69/// Converts a global memory opcode to its storage buffer equivalent
70IR::Opcode GlobalToStorage(IR::Opcode opcode) {
71 switch (opcode) {
72 case IR::Opcode::LoadGlobalS8:
73 return IR::Opcode::LoadStorageS8;
74 case IR::Opcode::LoadGlobalU8:
75 return IR::Opcode::LoadStorageU8;
76 case IR::Opcode::LoadGlobalS16:
77 return IR::Opcode::LoadStorageS16;
78 case IR::Opcode::LoadGlobalU16:
79 return IR::Opcode::LoadStorageU16;
80 case IR::Opcode::LoadGlobal32:
81 return IR::Opcode::LoadStorage32;
82 case IR::Opcode::LoadGlobal64:
83 return IR::Opcode::LoadStorage64;
84 case IR::Opcode::LoadGlobal128:
85 return IR::Opcode::LoadStorage128;
86 case IR::Opcode::WriteGlobalS8:
87 return IR::Opcode::WriteStorageS8;
88 case IR::Opcode::WriteGlobalU8:
89 return IR::Opcode::WriteStorageU8;
90 case IR::Opcode::WriteGlobalS16:
91 return IR::Opcode::WriteStorageS16;
92 case IR::Opcode::WriteGlobalU16:
93 return IR::Opcode::WriteStorageU16;
94 case IR::Opcode::WriteGlobal32:
95 return IR::Opcode::WriteStorage32;
96 case IR::Opcode::WriteGlobal64:
97 return IR::Opcode::WriteStorage64;
98 case IR::Opcode::WriteGlobal128:
99 return IR::Opcode::WriteStorage128;
100 default:
101 throw InvalidArgument("Invalid global memory opcode {}", opcode);
102 }
103}
104
105/// Returns true when a storage buffer address satisfies a bias
106bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
107 return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
108 storage_buffer.offset < bias.offset_end;
109}
110
111/// Ignores a global memory operation, reads return zero and writes are ignored
112void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
113 const IR::Value zero{u32{0}};
114 switch (inst->Opcode()) {
115 case IR::Opcode::LoadGlobalS8:
116 case IR::Opcode::LoadGlobalU8:
117 case IR::Opcode::LoadGlobalS16:
118 case IR::Opcode::LoadGlobalU16:
119 case IR::Opcode::LoadGlobal32:
120 inst->ReplaceUsesWith(zero);
121 break;
122 case IR::Opcode::LoadGlobal64:
123 inst->ReplaceUsesWith(
124 IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})});
125 break;
126 case IR::Opcode::LoadGlobal128:
127 inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst(
128 inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})});
129 break;
130 case IR::Opcode::WriteGlobalS8:
131 case IR::Opcode::WriteGlobalU8:
132 case IR::Opcode::WriteGlobalS16:
133 case IR::Opcode::WriteGlobalU16:
134 case IR::Opcode::WriteGlobal32:
135 case IR::Opcode::WriteGlobal64:
136 case IR::Opcode::WriteGlobal128:
137 inst->Invalidate();
138 break;
139 default:
140 throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode());
141 }
142}
143
144/// Recursively tries to track the storage buffer address used by a global memory instruction
145std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
146 if (value.IsImmediate()) {
147 // Immediates can't be a storage buffer
148 return std::nullopt;
149 }
150 const IR::Inst* const inst{value.InstRecursive()};
151 if (inst->Opcode() == IR::Opcode::GetCbuf) {
152 const IR::Value index{inst->Arg(0)};
153 const IR::Value offset{inst->Arg(1)};
154 if (!index.IsImmediate()) {
155 // Definitely not a storage buffer if it's read from a non-immediate index
156 return std::nullopt;
157 }
158 if (!offset.IsImmediate()) {
159 // TODO: Support SSBO arrays
160 return std::nullopt;
161 }
162 const StorageBufferAddr storage_buffer{
163 .index = index.U32(),
164 .offset = offset.U32(),
165 };
166 if (bias && !MeetsBias(storage_buffer, *bias)) {
167 // We have to blacklist some addresses in case we wrongly point to them
168 return std::nullopt;
169 }
170 return storage_buffer;
171 }
172 // Reversed loops are more likely to find the right result
173 for (size_t arg = inst->NumArgs(); arg--;) {
174 if (const std::optional storage_buffer{Track(inst->Arg(arg), bias)}) {
175 return *storage_buffer;
176 }
177 }
178 return std::nullopt;
179}
180
181/// Collects the storage buffer used by a global memory instruction and the instruction itself
182void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst,
183 StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) {
184 // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
185 // avoid getting false positives
186 static constexpr Bias nvn_bias{
187 .index{0},
188 .offset_begin{0x110},
189 .offset_end{0x610},
190 };
191 // First try to find storage buffers in the NVN address
192 const IR::U64 addr{inst->Arg(0)};
193 std::optional<StorageBufferAddr> storage_buffer{Track(addr, &nvn_bias)};
194 if (!storage_buffer) {
195 // If it fails, track without a bias
196 storage_buffer = Track(addr, nullptr);
197 if (!storage_buffer) {
198 // If that also failed, drop the global memory usage
199 IgnoreGlobalMemory(block, inst);
200 }
201 }
202 // Collect storage buffer and the instruction
203 storage_buffer_set.insert(*storage_buffer);
204 to_replace.push_back(StorageInst{
205 .storage_buffer{*storage_buffer},
206 .inst{inst},
207 });
208}
209
210/// Tries to track the first 32-bits of a global memory instruction
211std::optional<IR::U32> TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) {
212 // The first argument is the low level GPU pointer to the global memory instruction
213 const IR::U64 addr{inst->Arg(0)};
214 if (addr.IsImmediate()) {
215 // Not much we can do if it's an immediate
216 return std::nullopt;
217 }
218 // This address is expected to either be a PackUint2x32 or a IAdd64
219 IR::Inst* addr_inst{addr.InstRecursive()};
220 s32 imm_offset{0};
221 if (addr_inst->Opcode() == IR::Opcode::IAdd64) {
222 // If it's an IAdd64, get the immediate offset it is applying and grab the address
223 // instruction. This expects for the instruction to be canonicalized having the address on
224 // the first argument and the immediate offset on the second one.
225 const IR::U64 imm_offset_value{addr_inst->Arg(1)};
226 if (!imm_offset_value.IsImmediate()) {
227 return std::nullopt;
228 }
229 imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
230 const IR::U64 iadd_addr{addr_inst->Arg(0)};
231 if (iadd_addr.IsImmediate()) {
232 return std::nullopt;
233 }
234 addr_inst = iadd_addr.Inst();
235 }
236 // With IAdd64 handled, now PackUint2x32 is expected without exceptions
237 if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) {
238 return std::nullopt;
239 }
240 // PackUint2x32 is expected to be generated from a vector
241 const IR::Value vector{addr_inst->Arg(0)};
242 if (vector.IsImmediate()) {
243 return std::nullopt;
244 }
245 // This vector is expected to be a CompositeConstruct2
246 IR::Inst* const vector_inst{vector.InstRecursive()};
247 if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) {
248 return std::nullopt;
249 }
250 // Grab the first argument from the CompositeConstruct2, this is the low address.
251 // Re-apply the offset in case we found one.
252 const IR::U32 low_addr{vector_inst->Arg(0)};
253 return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr;
254}
255
256/// Returns the offset in indices (not bytes) for an equivalent storage instruction
257IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) {
258 IR::IREmitter ir{block, inst};
259 IR::U32 offset;
260 if (const std::optional<IR::U32> low_addr{TrackLowAddress(ir, &*inst)}) {
261 offset = *low_addr;
262 } else {
263 offset = ir.ConvertU(32, IR::U64{inst->Arg(0)});
264 }
265 // Subtract the least significant 32 bits from the guest offset. The result is the storage
266 // buffer offset in bytes.
267 const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
268 return ir.ISub(offset, low_cbuf);
269}
270
271/// Replace a global memory load instruction with its storage buffer equivalent
272void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
273 const IR::U32& offset) {
274 const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())};
275 const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})};
276 inst->ReplaceUsesWith(value);
277}
278
279/// Replace a global memory write instruction with its storage buffer equivalent
280void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
281 const IR::U32& offset) {
282 const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())};
283 block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)});
284 inst->Invalidate();
285}
286
287/// Replace a global memory instruction with its storage buffer equivalent
288void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index,
289 const IR::U32& offset) {
290 switch (inst->Opcode()) {
291 case IR::Opcode::LoadGlobalS8:
292 case IR::Opcode::LoadGlobalU8:
293 case IR::Opcode::LoadGlobalS16:
294 case IR::Opcode::LoadGlobalU16:
295 case IR::Opcode::LoadGlobal32:
296 case IR::Opcode::LoadGlobal64:
297 case IR::Opcode::LoadGlobal128:
298 return ReplaceLoad(block, inst, storage_index, offset);
299 case IR::Opcode::WriteGlobalS8:
300 case IR::Opcode::WriteGlobalU8:
301 case IR::Opcode::WriteGlobalS16:
302 case IR::Opcode::WriteGlobalU16:
303 case IR::Opcode::WriteGlobal32:
304 case IR::Opcode::WriteGlobal64:
305 case IR::Opcode::WriteGlobal128:
306 return ReplaceWrite(block, inst, storage_index, offset);
307 default:
308 throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode());
309 }
310}
311} // Anonymous namespace
312
313void GlobalMemoryToStorageBufferPass(IR::Block& block) {
314 StorageBufferSet storage_buffers;
315 StorageInstVector to_replace;
316
317 for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) {
318 if (!IsGlobalMemory(*inst)) {
319 continue;
320 }
321 CollectStorageBuffers(block, inst, storage_buffers, to_replace);
322 }
323 for (const auto [storage_buffer, inst] : to_replace) {
324 const auto it{storage_buffers.find(storage_buffer)};
325 const IR::U32 storage_index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}};
326 const IR::U32 offset{StorageOffset(block, inst, storage_buffer)};
327 Replace(block, inst, storage_index, offset);
328 }
329}
330
331} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
index 7f8500087..39a972919 100644
--- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -10,22 +10,24 @@
10 10
11namespace Shader::Optimization { 11namespace Shader::Optimization {
12 12
13void IdentityRemovalPass(IR::Block& block) { 13void IdentityRemovalPass(IR::Function& function) {
14 std::vector<IR::Inst*> to_invalidate; 14 std::vector<IR::Inst*> to_invalidate;
15 15
16 for (auto inst = block.begin(); inst != block.end();) { 16 for (auto& block : function.blocks) {
17 const size_t num_args{inst->NumArgs()}; 17 for (auto inst = block->begin(); inst != block->end();) {
18 for (size_t i = 0; i < num_args; ++i) { 18 const size_t num_args{inst->NumArgs()};
19 IR::Value arg; 19 for (size_t i = 0; i < num_args; ++i) {
20 while ((arg = inst->Arg(i)).IsIdentity()) { 20 IR::Value arg;
21 inst->SetArg(i, arg.Inst()->Arg(0)); 21 while ((arg = inst->Arg(i)).IsIdentity()) {
22 inst->SetArg(i, arg.Inst()->Arg(0));
23 }
24 }
25 if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) {
26 to_invalidate.push_back(&*inst);
27 inst = block->Instructions().erase(inst);
28 } else {
29 ++inst;
22 } 30 }
23 }
24 if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) {
25 to_invalidate.push_back(&*inst);
26 inst = block.Instructions().erase(inst);
27 } else {
28 ++inst;
29 } 31 }
30 } 32 }
31 for (IR::Inst* const inst : to_invalidate) { 33 for (IR::Inst* const inst : to_invalidate) {
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 7ed4005ed..578a24d89 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -16,9 +16,11 @@ void Invoke(Func&& func, IR::Function& function) {
16 } 16 }
17} 17}
18 18
19void ConstantPropagationPass(IR::Block& block);
19void DeadCodeEliminationPass(IR::Block& block); 20void DeadCodeEliminationPass(IR::Block& block);
20void IdentityRemovalPass(IR::Block& block); 21void GlobalMemoryToStorageBufferPass(IR::Block& block);
22void IdentityRemovalPass(IR::Function& function);
21void SsaRewritePass(IR::Function& function); 23void SsaRewritePass(IR::Function& function);
22void VerificationPass(const IR::Block& block); 24void VerificationPass(const IR::Function& function);
23 25
24} // namespace Shader::Optimization 26} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
index a4b256a40..3c9b020e0 100644
--- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -14,8 +14,6 @@
14// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 14// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
15// 15//
16 16
17#include <map>
18
19#include <boost/container/flat_map.hpp> 17#include <boost/container/flat_map.hpp>
20 18
21#include "shader_recompiler/frontend/ir/basic_block.h" 19#include "shader_recompiler/frontend/ir/basic_block.h"
@@ -30,6 +28,12 @@ namespace Shader::Optimization {
30namespace { 28namespace {
31using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>; 29using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>;
32 30
31struct FlagTag {};
32struct ZeroFlagTag : FlagTag {};
33struct SignFlagTag : FlagTag {};
34struct CarryFlagTag : FlagTag {};
35struct OverflowFlagTag : FlagTag {};
36
33struct DefTable { 37struct DefTable {
34 [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { 38 [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept {
35 return regs[IR::RegIndex(variable)]; 39 return regs[IR::RegIndex(variable)];
@@ -39,8 +43,28 @@ struct DefTable {
39 return preds[IR::PredIndex(variable)]; 43 return preds[IR::PredIndex(variable)];
40 } 44 }
41 45
46 [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept {
47 return zero_flag;
48 }
49
50 [[nodiscard]] ValueMap& operator[](SignFlagTag) noexcept {
51 return sign_flag;
52 }
53
54 [[nodiscard]] ValueMap& operator[](CarryFlagTag) noexcept {
55 return carry_flag;
56 }
57
58 [[nodiscard]] ValueMap& operator[](OverflowFlagTag) noexcept {
59 return overflow_flag;
60 }
61
42 std::array<ValueMap, IR::NUM_USER_REGS> regs; 62 std::array<ValueMap, IR::NUM_USER_REGS> regs;
43 std::array<ValueMap, IR::NUM_USER_PREDS> preds; 63 std::array<ValueMap, IR::NUM_USER_PREDS> preds;
64 ValueMap zero_flag;
65 ValueMap sign_flag;
66 ValueMap carry_flag;
67 ValueMap overflow_flag;
44}; 68};
45 69
46IR::Opcode UndefOpcode(IR::Reg) noexcept { 70IR::Opcode UndefOpcode(IR::Reg) noexcept {
@@ -51,6 +75,10 @@ IR::Opcode UndefOpcode(IR::Pred) noexcept {
51 return IR::Opcode::Undef1; 75 return IR::Opcode::Undef1;
52} 76}
53 77
78IR::Opcode UndefOpcode(const FlagTag&) noexcept {
79 return IR::Opcode::Undef1;
80}
81
54[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { 82[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept {
55 return inst.Opcode() == IR::Opcode::Phi; 83 return inst.Opcode() == IR::Opcode::Phi;
56} 84}
@@ -135,6 +163,18 @@ void SsaRewritePass(IR::Function& function) {
135 pass.WriteVariable(pred, block.get(), inst.Arg(1)); 163 pass.WriteVariable(pred, block.get(), inst.Arg(1));
136 } 164 }
137 break; 165 break;
166 case IR::Opcode::SetZFlag:
167 pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0));
168 break;
169 case IR::Opcode::SetSFlag:
170 pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0));
171 break;
172 case IR::Opcode::SetCFlag:
173 pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0));
174 break;
175 case IR::Opcode::SetOFlag:
176 pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0));
177 break;
138 case IR::Opcode::GetRegister: 178 case IR::Opcode::GetRegister:
139 if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { 179 if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
140 inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); 180 inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get()));
@@ -145,6 +185,18 @@ void SsaRewritePass(IR::Function& function) {
145 inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); 185 inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get()));
146 } 186 }
147 break; 187 break;
188 case IR::Opcode::GetZFlag:
189 inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get()));
190 break;
191 case IR::Opcode::GetSFlag:
192 inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get()));
193 break;
194 case IR::Opcode::GetCFlag:
195 inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get()));
196 break;
197 case IR::Opcode::GetOFlag:
198 inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get()));
199 break;
148 default: 200 default:
149 break; 201 break;
150 } 202 }
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
index 36d9ae39b..8a5adf5a2 100644
--- a/src/shader_recompiler/ir_opt/verification_pass.cpp
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -11,40 +11,44 @@
11 11
12namespace Shader::Optimization { 12namespace Shader::Optimization {
13 13
14static void ValidateTypes(const IR::Block& block) { 14static void ValidateTypes(const IR::Function& function) {
15 for (const IR::Inst& inst : block) { 15 for (const auto& block : function.blocks) {
16 const size_t num_args{inst.NumArgs()}; 16 for (const IR::Inst& inst : *block) {
17 for (size_t i = 0; i < num_args; ++i) { 17 const size_t num_args{inst.NumArgs()};
18 const IR::Type t1{inst.Arg(i).Type()}; 18 for (size_t i = 0; i < num_args; ++i) {
19 const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; 19 const IR::Type t1{inst.Arg(i).Type()};
20 if (!IR::AreTypesCompatible(t1, t2)) { 20 const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)};
21 throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(block)); 21 if (!IR::AreTypesCompatible(t1, t2)) {
22 throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
23 }
22 } 24 }
23 } 25 }
24 } 26 }
25} 27}
26 28
27static void ValidateUses(const IR::Block& block) { 29static void ValidateUses(const IR::Function& function) {
28 std::map<IR::Inst*, int> actual_uses; 30 std::map<IR::Inst*, int> actual_uses;
29 for (const IR::Inst& inst : block) { 31 for (const auto& block : function.blocks) {
30 const size_t num_args{inst.NumArgs()}; 32 for (const IR::Inst& inst : *block) {
31 for (size_t i = 0; i < num_args; ++i) { 33 const size_t num_args{inst.NumArgs()};
32 const IR::Value arg{inst.Arg(i)}; 34 for (size_t i = 0; i < num_args; ++i) {
33 if (!arg.IsImmediate()) { 35 const IR::Value arg{inst.Arg(i)};
34 ++actual_uses[arg.Inst()]; 36 if (!arg.IsImmediate()) {
37 ++actual_uses[arg.Inst()];
38 }
35 } 39 }
36 } 40 }
37 } 41 }
38 for (const auto [inst, uses] : actual_uses) { 42 for (const auto [inst, uses] : actual_uses) {
39 if (inst->UseCount() != uses) { 43 if (inst->UseCount() != uses) {
40 throw LogicError("Invalid uses in block:\n{}", IR::DumpBlock(block)); 44 throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/);
41 } 45 }
42 } 46 }
43} 47}
44 48
45void VerificationPass(const IR::Block& block) { 49void VerificationPass(const IR::Function& function) {
46 ValidateTypes(block); 50 ValidateTypes(function);
47 ValidateUses(block); 51 ValidateUses(function);
48} 52}
49 53
50} // namespace Shader::Optimization 54} // namespace Shader::Optimization