diff options
Diffstat (limited to 'src/shader_recompiler')
17 files changed, 652 insertions, 63 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f5dd4d29e..72d5f41d2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -59,7 +59,9 @@ add_executable(shader_recompiler | |||
| 59 | frontend/maxwell/translate/impl/move_special_register.cpp | 59 | frontend/maxwell/translate/impl/move_special_register.cpp |
| 60 | frontend/maxwell/translate/translate.cpp | 60 | frontend/maxwell/translate/translate.cpp |
| 61 | frontend/maxwell/translate/translate.h | 61 | frontend/maxwell/translate/translate.h |
| 62 | ir_opt/constant_propagation_pass.cpp | ||
| 62 | ir_opt/dead_code_elimination_pass.cpp | 63 | ir_opt/dead_code_elimination_pass.cpp |
| 64 | ir_opt/global_memory_to_storage_buffer_pass.cpp | ||
| 63 | ir_opt/identity_removal_pass.cpp | 65 | ir_opt/identity_removal_pass.cpp |
| 64 | ir_opt/passes.h | 66 | ir_opt/passes.h |
| 65 | ir_opt/ssa_rewrite_pass.cpp | 67 | ir_opt/ssa_rewrite_pass.cpp |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 87b253c9a..1c5ae0109 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -504,6 +504,20 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { | |||
| 504 | } | 504 | } |
| 505 | } | 505 | } |
| 506 | 506 | ||
| 507 | U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { | ||
| 508 | if (a.Type() != b.Type()) { | ||
| 509 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||
| 510 | } | ||
| 511 | switch (a.Type()) { | ||
| 512 | case Type::U32: | ||
| 513 | return Inst<U32>(Opcode::ISub32, a, b); | ||
| 514 | case Type::U64: | ||
| 515 | return Inst<U64>(Opcode::ISub64, a, b); | ||
| 516 | default: | ||
| 517 | ThrowInvalidType(a.Type()); | ||
| 518 | } | ||
| 519 | } | ||
| 520 | |||
| 507 | U32 IREmitter::IMul(const U32& a, const U32& b) { | 521 | U32 IREmitter::IMul(const U32& a, const U32& b) { |
| 508 | return Inst<U32>(Opcode::IMul32, a, b); | 522 | return Inst<U32>(Opcode::IMul32, a, b); |
| 509 | } | 523 | } |
| @@ -679,8 +693,8 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& v | |||
| 679 | } | 693 | } |
| 680 | } | 694 | } |
| 681 | 695 | ||
| 682 | U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) { | 696 | U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { |
| 683 | switch (bitsize) { | 697 | switch (result_bitsize) { |
| 684 | case 32: | 698 | case 32: |
| 685 | switch (value.Type()) { | 699 | switch (value.Type()) { |
| 686 | case Type::U32: | 700 | case Type::U32: |
| @@ -703,7 +717,7 @@ U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) { | |||
| 703 | break; | 717 | break; |
| 704 | } | 718 | } |
| 705 | } | 719 | } |
| 706 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), bitsize); | 720 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); |
| 707 | } | 721 | } |
| 708 | 722 | ||
| 709 | } // namespace Shader::IR | 723 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7ff763ecf..84b844898 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -17,6 +17,8 @@ namespace Shader::IR { | |||
| 17 | class IREmitter { | 17 | class IREmitter { |
| 18 | public: | 18 | public: |
| 19 | explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} | 19 | explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} |
| 20 | explicit IREmitter(Block& block_, Block::iterator insertion_point_) | ||
| 21 | : block{block_}, insertion_point{insertion_point_} {} | ||
| 20 | 22 | ||
| 21 | Block& block; | 23 | Block& block; |
| 22 | 24 | ||
| @@ -125,6 +127,7 @@ public: | |||
| 125 | [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); | 127 | [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); |
| 126 | 128 | ||
| 127 | [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); | 129 | [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); |
| 130 | [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); | ||
| 128 | [[nodiscard]] U32 IMul(const U32& a, const U32& b); | 131 | [[nodiscard]] U32 IMul(const U32& a, const U32& b); |
| 129 | [[nodiscard]] U32 INeg(const U32& value); | 132 | [[nodiscard]] U32 INeg(const U32& value); |
| 130 | [[nodiscard]] U32 IAbs(const U32& value); | 133 | [[nodiscard]] U32 IAbs(const U32& value); |
| @@ -155,7 +158,7 @@ public: | |||
| 155 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value); | 158 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value); |
| 156 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value); | 159 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value); |
| 157 | 160 | ||
| 158 | [[nodiscard]] U32U64 ConvertU(size_t bitsize, const U32U64& value); | 161 | [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); |
| 159 | 162 | ||
| 160 | private: | 163 | private: |
| 161 | IR::Block::iterator insertion_point; | 164 | IR::Block::iterator insertion_point; |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ecf76e23d..de953838c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 6 | |||
| 5 | #include "shader_recompiler/exception.h" | 7 | #include "shader_recompiler/exception.h" |
| 6 | #include "shader_recompiler/frontend/ir/microinstruction.h" | 8 | #include "shader_recompiler/frontend/ir/microinstruction.h" |
| 7 | #include "shader_recompiler/frontend/ir/type.h" | 9 | #include "shader_recompiler/frontend/ir/type.h" |
| @@ -44,6 +46,13 @@ bool Inst::MayHaveSideEffects() const noexcept { | |||
| 44 | case Opcode::WriteGlobal32: | 46 | case Opcode::WriteGlobal32: |
| 45 | case Opcode::WriteGlobal64: | 47 | case Opcode::WriteGlobal64: |
| 46 | case Opcode::WriteGlobal128: | 48 | case Opcode::WriteGlobal128: |
| 49 | case Opcode::WriteStorageU8: | ||
| 50 | case Opcode::WriteStorageS8: | ||
| 51 | case Opcode::WriteStorageU16: | ||
| 52 | case Opcode::WriteStorageS16: | ||
| 53 | case Opcode::WriteStorage32: | ||
| 54 | case Opcode::WriteStorage64: | ||
| 55 | case Opcode::WriteStorage128: | ||
| 47 | return true; | 56 | return true; |
| 48 | default: | 57 | default: |
| 49 | return false; | 58 | return false; |
| @@ -56,15 +65,19 @@ bool Inst::IsPseudoInstruction() const noexcept { | |||
| 56 | case Opcode::GetSignFromOp: | 65 | case Opcode::GetSignFromOp: |
| 57 | case Opcode::GetCarryFromOp: | 66 | case Opcode::GetCarryFromOp: |
| 58 | case Opcode::GetOverflowFromOp: | 67 | case Opcode::GetOverflowFromOp: |
| 59 | case Opcode::GetZSCOFromOp: | ||
| 60 | return true; | 68 | return true; |
| 61 | default: | 69 | default: |
| 62 | return false; | 70 | return false; |
| 63 | } | 71 | } |
| 64 | } | 72 | } |
| 65 | 73 | ||
| 74 | bool Inst::AreAllArgsImmediates() const noexcept { | ||
| 75 | return std::all_of(args.begin(), args.begin() + NumArgs(), | ||
| 76 | [](const IR::Value& value) { return value.IsImmediate(); }); | ||
| 77 | } | ||
| 78 | |||
| 66 | bool Inst::HasAssociatedPseudoOperation() const noexcept { | 79 | bool Inst::HasAssociatedPseudoOperation() const noexcept { |
| 67 | return zero_inst || sign_inst || carry_inst || overflow_inst || zsco_inst; | 80 | return zero_inst || sign_inst || carry_inst || overflow_inst; |
| 68 | } | 81 | } |
| 69 | 82 | ||
| 70 | Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | 83 | Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { |
| @@ -82,9 +95,6 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | |||
| 82 | case Opcode::GetOverflowFromOp: | 95 | case Opcode::GetOverflowFromOp: |
| 83 | CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); | 96 | CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); |
| 84 | return overflow_inst; | 97 | return overflow_inst; |
| 85 | case Opcode::GetZSCOFromOp: | ||
| 86 | CheckPseudoInstruction(zsco_inst, Opcode::GetZSCOFromOp); | ||
| 87 | return zsco_inst; | ||
| 88 | default: | 98 | default: |
| 89 | throw InvalidArgument("{} is not a pseudo-instruction", opcode); | 99 | throw InvalidArgument("{} is not a pseudo-instruction", opcode); |
| 90 | } | 100 | } |
| @@ -176,9 +186,6 @@ void Inst::Use(const Value& value) { | |||
| 176 | case Opcode::GetOverflowFromOp: | 186 | case Opcode::GetOverflowFromOp: |
| 177 | SetPseudoInstruction(value.Inst()->overflow_inst, this); | 187 | SetPseudoInstruction(value.Inst()->overflow_inst, this); |
| 178 | break; | 188 | break; |
| 179 | case Opcode::GetZSCOFromOp: | ||
| 180 | SetPseudoInstruction(value.Inst()->zsco_inst, this); | ||
| 181 | break; | ||
| 182 | default: | 189 | default: |
| 183 | break; | 190 | break; |
| 184 | } | 191 | } |
| @@ -200,9 +207,6 @@ void Inst::UndoUse(const Value& value) { | |||
| 200 | case Opcode::GetOverflowFromOp: | 207 | case Opcode::GetOverflowFromOp: |
| 201 | RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp); | 208 | RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp); |
| 202 | break; | 209 | break; |
| 203 | case Opcode::GetZSCOFromOp: | ||
| 204 | RemovePseudoInstruction(value.Inst()->zsco_inst, Opcode::GetZSCOFromOp); | ||
| 205 | break; | ||
| 206 | default: | 210 | default: |
| 207 | break; | 211 | break; |
| 208 | } | 212 | } |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 61849695a..22101c9e2 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h | |||
| @@ -49,6 +49,9 @@ public: | |||
| 49 | /// Pseudo-instructions depend on their parent instructions for their semantics. | 49 | /// Pseudo-instructions depend on their parent instructions for their semantics. |
| 50 | [[nodiscard]] bool IsPseudoInstruction() const noexcept; | 50 | [[nodiscard]] bool IsPseudoInstruction() const noexcept; |
| 51 | 51 | ||
| 52 | /// Determines if all arguments of this instruction are immediates. | ||
| 53 | [[nodiscard]] bool AreAllArgsImmediates() const noexcept; | ||
| 54 | |||
| 52 | /// Determines if there is a pseudo-operation associated with this instruction. | 55 | /// Determines if there is a pseudo-operation associated with this instruction. |
| 53 | [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; | 56 | [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; |
| 54 | /// Gets a pseudo-operation associated with this instruction | 57 | /// Gets a pseudo-operation associated with this instruction |
| @@ -94,7 +97,6 @@ private: | |||
| 94 | Inst* sign_inst{}; | 97 | Inst* sign_inst{}; |
| 95 | Inst* carry_inst{}; | 98 | Inst* carry_inst{}; |
| 96 | Inst* overflow_inst{}; | 99 | Inst* overflow_inst{}; |
| 97 | Inst* zsco_inst{}; | ||
| 98 | std::vector<std::pair<Block*, Value>> phi_operands; | 100 | std::vector<std::pair<Block*, Value>> phi_operands; |
| 99 | u64 flags{}; | 101 | u64 flags{}; |
| 100 | }; | 102 | }; |
diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 4ecb5e936..4596bf39f 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc | |||
| @@ -24,9 +24,6 @@ OPCODE(GetAttribute, U32, Attr | |||
| 24 | OPCODE(SetAttribute, U32, Attribute, ) | 24 | OPCODE(SetAttribute, U32, Attribute, ) |
| 25 | OPCODE(GetAttributeIndexed, U32, U32, ) | 25 | OPCODE(GetAttributeIndexed, U32, U32, ) |
| 26 | OPCODE(SetAttributeIndexed, U32, U32, ) | 26 | OPCODE(SetAttributeIndexed, U32, U32, ) |
| 27 | OPCODE(GetZSCORaw, U32, ) | ||
| 28 | OPCODE(SetZSCORaw, Void, U32, ) | ||
| 29 | OPCODE(SetZSCO, Void, ZSCO, ) | ||
| 30 | OPCODE(GetZFlag, U1, Void, ) | 27 | OPCODE(GetZFlag, U1, Void, ) |
| 31 | OPCODE(GetSFlag, U1, Void, ) | 28 | OPCODE(GetSFlag, U1, Void, ) |
| 32 | OPCODE(GetCFlag, U1, Void, ) | 29 | OPCODE(GetCFlag, U1, Void, ) |
| @@ -65,6 +62,22 @@ OPCODE(WriteGlobal32, Void, U64, | |||
| 65 | OPCODE(WriteGlobal64, Void, U64, Opaque, ) | 62 | OPCODE(WriteGlobal64, Void, U64, Opaque, ) |
| 66 | OPCODE(WriteGlobal128, Void, U64, Opaque, ) | 63 | OPCODE(WriteGlobal128, Void, U64, Opaque, ) |
| 67 | 64 | ||
| 65 | // Storage buffer operations | ||
| 66 | OPCODE(LoadStorageU8, U32, U32, U32, ) | ||
| 67 | OPCODE(LoadStorageS8, U32, U32, U32, ) | ||
| 68 | OPCODE(LoadStorageU16, U32, U32, U32, ) | ||
| 69 | OPCODE(LoadStorageS16, U32, U32, U32, ) | ||
| 70 | OPCODE(LoadStorage32, U32, U32, U32, ) | ||
| 71 | OPCODE(LoadStorage64, Opaque, U32, U32, ) | ||
| 72 | OPCODE(LoadStorage128, Opaque, U32, U32, ) | ||
| 73 | OPCODE(WriteStorageU8, Void, U32, U32, U32, ) | ||
| 74 | OPCODE(WriteStorageS8, Void, U32, U32, U32, ) | ||
| 75 | OPCODE(WriteStorageU16, Void, U32, U32, U32, ) | ||
| 76 | OPCODE(WriteStorageS16, Void, U32, U32, U32, ) | ||
| 77 | OPCODE(WriteStorage32, Void, U32, U32, U32, ) | ||
| 78 | OPCODE(WriteStorage64, Void, U32, U32, Opaque, ) | ||
| 79 | OPCODE(WriteStorage128, Void, U32, U32, Opaque, ) | ||
| 80 | |||
| 68 | // Vector utility | 81 | // Vector utility |
| 69 | OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, ) | 82 | OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, ) |
| 70 | OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, ) | 83 | OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, ) |
| @@ -90,7 +103,6 @@ OPCODE(GetZeroFromOp, U1, Opaq | |||
| 90 | OPCODE(GetSignFromOp, U1, Opaque, ) | 103 | OPCODE(GetSignFromOp, U1, Opaque, ) |
| 91 | OPCODE(GetCarryFromOp, U1, Opaque, ) | 104 | OPCODE(GetCarryFromOp, U1, Opaque, ) |
| 92 | OPCODE(GetOverflowFromOp, U1, Opaque, ) | 105 | OPCODE(GetOverflowFromOp, U1, Opaque, ) |
| 93 | OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) | ||
| 94 | 106 | ||
| 95 | // Floating-point operations | 107 | // Floating-point operations |
| 96 | OPCODE(FPAbs16, U16, U16, ) | 108 | OPCODE(FPAbs16, U16, U16, ) |
| @@ -143,6 +155,8 @@ OPCODE(FPTrunc64, U64, U64, | |||
| 143 | // Integer operations | 155 | // Integer operations |
| 144 | OPCODE(IAdd32, U32, U32, U32, ) | 156 | OPCODE(IAdd32, U32, U32, U32, ) |
| 145 | OPCODE(IAdd64, U64, U64, U64, ) | 157 | OPCODE(IAdd64, U64, U64, U64, ) |
| 158 | OPCODE(ISub32, U32, U32, U32, ) | ||
| 159 | OPCODE(ISub64, U64, U64, U64, ) | ||
| 146 | OPCODE(IMul32, U32, U32, U32, ) | 160 | OPCODE(IMul32, U32, U32, U32, ) |
| 147 | OPCODE(INeg32, U32, U32, ) | 161 | OPCODE(INeg32, U32, U32, ) |
| 148 | OPCODE(IAbs32, U32, U32, ) | 162 | OPCODE(IAbs32, U32, U32, ) |
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp index da1e2a0f6..13cc09195 100644 --- a/src/shader_recompiler/frontend/ir/type.cpp +++ b/src/shader_recompiler/frontend/ir/type.cpp | |||
| @@ -11,7 +11,7 @@ namespace Shader::IR { | |||
| 11 | 11 | ||
| 12 | std::string NameOf(Type type) { | 12 | std::string NameOf(Type type) { |
| 13 | static constexpr std::array names{ | 13 | static constexpr std::array names{ |
| 14 | "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", "ZSCO", | 14 | "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", |
| 15 | }; | 15 | }; |
| 16 | const size_t bits{static_cast<size_t>(type)}; | 16 | const size_t bits{static_cast<size_t>(type)}; |
| 17 | if (bits == 0) { | 17 | if (bits == 0) { |
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index f753628e8..397875018 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h | |||
| @@ -25,7 +25,6 @@ enum class Type { | |||
| 25 | U16 = 1 << 7, | 25 | U16 = 1 << 7, |
| 26 | U32 = 1 << 8, | 26 | U32 = 1 << 8, |
| 27 | U64 = 1 << 9, | 27 | U64 = 1 << 9, |
| 28 | ZSCO = 1 << 10, | ||
| 29 | }; | 28 | }; |
| 30 | DECLARE_ENUM_FLAG_OPERATORS(Type) | 29 | DECLARE_ENUM_FLAG_OPERATORS(Type) |
| 31 | 30 | ||
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 1e974e88c..59a9b10dc 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp | |||
| @@ -91,26 +91,41 @@ IR::Attribute Value::Attribute() const { | |||
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | bool Value::U1() const { | 93 | bool Value::U1() const { |
| 94 | if (IsIdentity()) { | ||
| 95 | return inst->Arg(0).U1(); | ||
| 96 | } | ||
| 94 | ValidateAccess(Type::U1); | 97 | ValidateAccess(Type::U1); |
| 95 | return imm_u1; | 98 | return imm_u1; |
| 96 | } | 99 | } |
| 97 | 100 | ||
| 98 | u8 Value::U8() const { | 101 | u8 Value::U8() const { |
| 102 | if (IsIdentity()) { | ||
| 103 | return inst->Arg(0).U8(); | ||
| 104 | } | ||
| 99 | ValidateAccess(Type::U8); | 105 | ValidateAccess(Type::U8); |
| 100 | return imm_u8; | 106 | return imm_u8; |
| 101 | } | 107 | } |
| 102 | 108 | ||
| 103 | u16 Value::U16() const { | 109 | u16 Value::U16() const { |
| 110 | if (IsIdentity()) { | ||
| 111 | return inst->Arg(0).U16(); | ||
| 112 | } | ||
| 104 | ValidateAccess(Type::U16); | 113 | ValidateAccess(Type::U16); |
| 105 | return imm_u16; | 114 | return imm_u16; |
| 106 | } | 115 | } |
| 107 | 116 | ||
| 108 | u32 Value::U32() const { | 117 | u32 Value::U32() const { |
| 118 | if (IsIdentity()) { | ||
| 119 | return inst->Arg(0).U32(); | ||
| 120 | } | ||
| 109 | ValidateAccess(Type::U32); | 121 | ValidateAccess(Type::U32); |
| 110 | return imm_u32; | 122 | return imm_u32; |
| 111 | } | 123 | } |
| 112 | 124 | ||
| 113 | u64 Value::U64() const { | 125 | u64 Value::U64() const { |
| 126 | if (IsIdentity()) { | ||
| 127 | return inst->Arg(0).U64(); | ||
| 128 | } | ||
| 114 | ValidateAccess(Type::U64); | 129 | ValidateAccess(Type::U64); |
| 115 | return imm_u64; | 130 | return imm_u64; |
| 116 | } | 131 | } |
| @@ -142,8 +157,6 @@ bool Value::operator==(const Value& other) const { | |||
| 142 | return imm_u32 == other.imm_u32; | 157 | return imm_u32 == other.imm_u32; |
| 143 | case Type::U64: | 158 | case Type::U64: |
| 144 | return imm_u64 == other.imm_u64; | 159 | return imm_u64 == other.imm_u64; |
| 145 | case Type::ZSCO: | ||
| 146 | throw NotImplementedException("ZSCO comparison"); | ||
| 147 | } | 160 | } |
| 148 | throw LogicError("Invalid type {}", type); | 161 | throw LogicError("Invalid type {}", type); |
| 149 | } | 162 | } |
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 368119921..31f831794 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h | |||
| @@ -96,6 +96,5 @@ using U64 = TypedValue<Type::U64>; | |||
| 96 | using U32U64 = TypedValue<Type::U32 | Type::U64>; | 96 | using U32U64 = TypedValue<Type::U32 | Type::U64>; |
| 97 | using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>; | 97 | using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>; |
| 98 | using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>; | 98 | using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>; |
| 99 | using ZSCO = TypedValue<Type::ZSCO>; | ||
| 100 | 99 | ||
| 101 | } // namespace Shader::IR | 100 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index bd1f96c07..b3f2de852 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -52,9 +52,11 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { | |||
| 52 | } | 52 | } |
| 53 | std::ranges::for_each(functions, Optimization::SsaRewritePass); | 53 | std::ranges::for_each(functions, Optimization::SsaRewritePass); |
| 54 | for (IR::Function& function : functions) { | 54 | for (IR::Function& function : functions) { |
| 55 | Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function); | ||
| 56 | Optimization::Invoke(Optimization::ConstantPropagationPass, function); | ||
| 55 | Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); | 57 | Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); |
| 56 | Optimization::Invoke(Optimization::IdentityRemovalPass, function); | 58 | Optimization::IdentityRemovalPass(function); |
| 57 | // Optimization::Invoke(Optimization::VerificationPass, function); | 59 | Optimization::VerificationPass(function); |
| 58 | } | 60 | } |
| 59 | //*/ | 61 | //*/ |
| 60 | } | 62 | } |
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp new file mode 100644 index 000000000..02f5b653d --- /dev/null +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -0,0 +1,146 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <type_traits> | ||
| 7 | |||
| 8 | #include "common/bit_util.h" | ||
| 9 | #include "shader_recompiler/exception.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/microinstruction.h" | ||
| 11 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 12 | |||
| 13 | namespace Shader::Optimization { | ||
| 14 | namespace { | ||
| 15 | [[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) { | ||
| 16 | if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) { | ||
| 17 | throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count); | ||
| 18 | } | ||
| 19 | return (base >> shift) & ((1U << count) - 1); | ||
| 20 | } | ||
| 21 | |||
| 22 | template <typename T> | ||
| 23 | [[nodiscard]] T Arg(const IR::Value& value) { | ||
| 24 | if constexpr (std::is_same_v<T, bool>) { | ||
| 25 | return value.U1(); | ||
| 26 | } else if constexpr (std::is_same_v<T, u32>) { | ||
| 27 | return value.U32(); | ||
| 28 | } else if constexpr (std::is_same_v<T, u64>) { | ||
| 29 | return value.U64(); | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | template <typename ImmFn> | ||
| 34 | bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { | ||
| 35 | const auto arg = [](const IR::Value& value) { | ||
| 36 | if constexpr (std::is_invocable_r_v<bool, ImmFn, bool, bool>) { | ||
| 37 | return value.U1(); | ||
| 38 | } else if constexpr (std::is_invocable_r_v<u32, ImmFn, u32, u32>) { | ||
| 39 | return value.U32(); | ||
| 40 | } else if constexpr (std::is_invocable_r_v<u64, ImmFn, u64, u64>) { | ||
| 41 | return value.U64(); | ||
| 42 | } | ||
| 43 | }; | ||
| 44 | const IR::Value lhs{inst.Arg(0)}; | ||
| 45 | const IR::Value rhs{inst.Arg(1)}; | ||
| 46 | |||
| 47 | const bool is_lhs_immediate{lhs.IsImmediate()}; | ||
| 48 | const bool is_rhs_immediate{rhs.IsImmediate()}; | ||
| 49 | |||
| 50 | if (is_lhs_immediate && is_rhs_immediate) { | ||
| 51 | const auto result{imm_fn(arg(lhs), arg(rhs))}; | ||
| 52 | inst.ReplaceUsesWith(IR::Value{result}); | ||
| 53 | return false; | ||
| 54 | } | ||
| 55 | if (is_lhs_immediate && !is_rhs_immediate) { | ||
| 56 | IR::Inst* const rhs_inst{rhs.InstRecursive()}; | ||
| 57 | if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { | ||
| 58 | const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))}; | ||
| 59 | inst.SetArg(0, rhs_inst->Arg(0)); | ||
| 60 | inst.SetArg(1, IR::Value{combined}); | ||
| 61 | } else { | ||
| 62 | // Normalize | ||
| 63 | inst.SetArg(0, rhs); | ||
| 64 | inst.SetArg(1, lhs); | ||
| 65 | } | ||
| 66 | } | ||
| 67 | if (!is_lhs_immediate && is_rhs_immediate) { | ||
| 68 | const IR::Inst* const lhs_inst{lhs.InstRecursive()}; | ||
| 69 | if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { | ||
| 70 | const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))}; | ||
| 71 | inst.SetArg(0, lhs_inst->Arg(0)); | ||
| 72 | inst.SetArg(1, IR::Value{combined}); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | return true; | ||
| 76 | } | ||
| 77 | |||
| 78 | void FoldGetRegister(IR::Inst& inst) { | ||
| 79 | if (inst.Arg(0).Reg() == IR::Reg::RZ) { | ||
| 80 | inst.ReplaceUsesWith(IR::Value{u32{0}}); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void FoldGetPred(IR::Inst& inst) { | ||
| 85 | if (inst.Arg(0).Pred() == IR::Pred::PT) { | ||
| 86 | inst.ReplaceUsesWith(IR::Value{true}); | ||
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | template <typename T> | ||
| 91 | void FoldAdd(IR::Inst& inst) { | ||
| 92 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 93 | return; | ||
| 94 | } | ||
| 95 | if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { | ||
| 96 | return; | ||
| 97 | } | ||
| 98 | const IR::Value rhs{inst.Arg(1)}; | ||
| 99 | if (rhs.IsImmediate() && Arg<T>(rhs) == 0) { | ||
| 100 | inst.ReplaceUsesWith(inst.Arg(0)); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | void FoldLogicalAnd(IR::Inst& inst) { | ||
| 105 | if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { | ||
| 106 | return; | ||
| 107 | } | ||
| 108 | const IR::Value rhs{inst.Arg(1)}; | ||
| 109 | if (rhs.IsImmediate()) { | ||
| 110 | if (rhs.U1()) { | ||
| 111 | inst.ReplaceUsesWith(inst.Arg(0)); | ||
| 112 | } else { | ||
| 113 | inst.ReplaceUsesWith(IR::Value{false}); | ||
| 114 | } | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | void ConstantPropagation(IR::Inst& inst) { | ||
| 119 | switch (inst.Opcode()) { | ||
| 120 | case IR::Opcode::GetRegister: | ||
| 121 | return FoldGetRegister(inst); | ||
| 122 | case IR::Opcode::GetPred: | ||
| 123 | return FoldGetPred(inst); | ||
| 124 | case IR::Opcode::IAdd32: | ||
| 125 | return FoldAdd<u32>(inst); | ||
| 126 | case IR::Opcode::IAdd64: | ||
| 127 | return FoldAdd<u64>(inst); | ||
| 128 | case IR::Opcode::BitFieldUExtract: | ||
| 129 | if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) { | ||
| 130 | inst.ReplaceUsesWith(IR::Value{ | ||
| 131 | BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())}); | ||
| 132 | } | ||
| 133 | break; | ||
| 134 | case IR::Opcode::LogicalAnd: | ||
| 135 | return FoldLogicalAnd(inst); | ||
| 136 | default: | ||
| 137 | break; | ||
| 138 | } | ||
| 139 | } | ||
| 140 | } // Anonymous namespace | ||
| 141 | |||
| 142 | void ConstantPropagationPass(IR::Block& block) { | ||
| 143 | std::ranges::for_each(block, ConstantPropagation); | ||
| 144 | } | ||
| 145 | |||
| 146 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp new file mode 100644 index 000000000..ee69a5c9d --- /dev/null +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -0,0 +1,331 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <compare> | ||
| 7 | #include <optional> | ||
| 8 | #include <ranges> | ||
| 9 | |||
| 10 | #include <boost/container/flat_set.hpp> | ||
| 11 | #include <boost/container/small_vector.hpp> | ||
| 12 | |||
| 13 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/microinstruction.h" | ||
| 16 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 17 | |||
| 18 | namespace Shader::Optimization { | ||
| 19 | namespace { | ||
| 20 | /// Address in constant buffers to the storage buffer descriptor | ||
| 21 | struct StorageBufferAddr { | ||
| 22 | auto operator<=>(const StorageBufferAddr&) const noexcept = default; | ||
| 23 | |||
| 24 | u32 index; | ||
| 25 | u32 offset; | ||
| 26 | }; | ||
| 27 | |||
| 28 | /// Block iterator to a global memory instruction and the storage buffer it uses | ||
| 29 | struct StorageInst { | ||
| 30 | StorageBufferAddr storage_buffer; | ||
| 31 | IR::Block::iterator inst; | ||
| 32 | }; | ||
| 33 | |||
| 34 | /// Bias towards a certain range of constant buffers when looking for storage buffers | ||
| 35 | struct Bias { | ||
| 36 | u32 index; | ||
| 37 | u32 offset_begin; | ||
| 38 | u32 offset_end; | ||
| 39 | }; | ||
| 40 | |||
| 41 | using StorageBufferSet = | ||
| 42 | boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, | ||
| 43 | boost::container::small_vector<StorageBufferAddr, 16>>; | ||
| 44 | using StorageInstVector = boost::container::small_vector<StorageInst, 32>; | ||
| 45 | |||
| 46 | /// Returns true when the instruction is a global memory instruction | ||
| 47 | bool IsGlobalMemory(const IR::Inst& inst) { | ||
| 48 | switch (inst.Opcode()) { | ||
| 49 | case IR::Opcode::LoadGlobalS8: | ||
| 50 | case IR::Opcode::LoadGlobalU8: | ||
| 51 | case IR::Opcode::LoadGlobalS16: | ||
| 52 | case IR::Opcode::LoadGlobalU16: | ||
| 53 | case IR::Opcode::LoadGlobal32: | ||
| 54 | case IR::Opcode::LoadGlobal64: | ||
| 55 | case IR::Opcode::LoadGlobal128: | ||
| 56 | case IR::Opcode::WriteGlobalS8: | ||
| 57 | case IR::Opcode::WriteGlobalU8: | ||
| 58 | case IR::Opcode::WriteGlobalS16: | ||
| 59 | case IR::Opcode::WriteGlobalU16: | ||
| 60 | case IR::Opcode::WriteGlobal32: | ||
| 61 | case IR::Opcode::WriteGlobal64: | ||
| 62 | case IR::Opcode::WriteGlobal128: | ||
| 63 | return true; | ||
| 64 | default: | ||
| 65 | return false; | ||
| 66 | } | ||
| 67 | } | ||
| 68 | |||
| 69 | /// Converts a global memory opcode to its storage buffer equivalent | ||
| 70 | IR::Opcode GlobalToStorage(IR::Opcode opcode) { | ||
| 71 | switch (opcode) { | ||
| 72 | case IR::Opcode::LoadGlobalS8: | ||
| 73 | return IR::Opcode::LoadStorageS8; | ||
| 74 | case IR::Opcode::LoadGlobalU8: | ||
| 75 | return IR::Opcode::LoadStorageU8; | ||
| 76 | case IR::Opcode::LoadGlobalS16: | ||
| 77 | return IR::Opcode::LoadStorageS16; | ||
| 78 | case IR::Opcode::LoadGlobalU16: | ||
| 79 | return IR::Opcode::LoadStorageU16; | ||
| 80 | case IR::Opcode::LoadGlobal32: | ||
| 81 | return IR::Opcode::LoadStorage32; | ||
| 82 | case IR::Opcode::LoadGlobal64: | ||
| 83 | return IR::Opcode::LoadStorage64; | ||
| 84 | case IR::Opcode::LoadGlobal128: | ||
| 85 | return IR::Opcode::LoadStorage128; | ||
| 86 | case IR::Opcode::WriteGlobalS8: | ||
| 87 | return IR::Opcode::WriteStorageS8; | ||
| 88 | case IR::Opcode::WriteGlobalU8: | ||
| 89 | return IR::Opcode::WriteStorageU8; | ||
| 90 | case IR::Opcode::WriteGlobalS16: | ||
| 91 | return IR::Opcode::WriteStorageS16; | ||
| 92 | case IR::Opcode::WriteGlobalU16: | ||
| 93 | return IR::Opcode::WriteStorageU16; | ||
| 94 | case IR::Opcode::WriteGlobal32: | ||
| 95 | return IR::Opcode::WriteStorage32; | ||
| 96 | case IR::Opcode::WriteGlobal64: | ||
| 97 | return IR::Opcode::WriteStorage64; | ||
| 98 | case IR::Opcode::WriteGlobal128: | ||
| 99 | return IR::Opcode::WriteStorage128; | ||
| 100 | default: | ||
| 101 | throw InvalidArgument("Invalid global memory opcode {}", opcode); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | /// Returns true when a storage buffer address satisfies a bias | ||
| 106 | bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { | ||
| 107 | return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && | ||
| 108 | storage_buffer.offset < bias.offset_end; | ||
| 109 | } | ||
| 110 | |||
| 111 | /// Ignores a global memory operation, reads return zero and writes are ignored | ||
| 112 | void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { | ||
| 113 | const IR::Value zero{u32{0}}; | ||
| 114 | switch (inst->Opcode()) { | ||
| 115 | case IR::Opcode::LoadGlobalS8: | ||
| 116 | case IR::Opcode::LoadGlobalU8: | ||
| 117 | case IR::Opcode::LoadGlobalS16: | ||
| 118 | case IR::Opcode::LoadGlobalU16: | ||
| 119 | case IR::Opcode::LoadGlobal32: | ||
| 120 | inst->ReplaceUsesWith(zero); | ||
| 121 | break; | ||
| 122 | case IR::Opcode::LoadGlobal64: | ||
| 123 | inst->ReplaceUsesWith( | ||
| 124 | IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})}); | ||
| 125 | break; | ||
| 126 | case IR::Opcode::LoadGlobal128: | ||
| 127 | inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( | ||
| 128 | inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})}); | ||
| 129 | break; | ||
| 130 | case IR::Opcode::WriteGlobalS8: | ||
| 131 | case IR::Opcode::WriteGlobalU8: | ||
| 132 | case IR::Opcode::WriteGlobalS16: | ||
| 133 | case IR::Opcode::WriteGlobalU16: | ||
| 134 | case IR::Opcode::WriteGlobal32: | ||
| 135 | case IR::Opcode::WriteGlobal64: | ||
| 136 | case IR::Opcode::WriteGlobal128: | ||
| 137 | inst->Invalidate(); | ||
| 138 | break; | ||
| 139 | default: | ||
| 140 | throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode()); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | /// Recursively tries to track the storage buffer address used by a global memory instruction | ||
| 145 | std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { | ||
| 146 | if (value.IsImmediate()) { | ||
| 147 | // Immediates can't be a storage buffer | ||
| 148 | return std::nullopt; | ||
| 149 | } | ||
| 150 | const IR::Inst* const inst{value.InstRecursive()}; | ||
| 151 | if (inst->Opcode() == IR::Opcode::GetCbuf) { | ||
| 152 | const IR::Value index{inst->Arg(0)}; | ||
| 153 | const IR::Value offset{inst->Arg(1)}; | ||
| 154 | if (!index.IsImmediate()) { | ||
| 155 | // Definitely not a storage buffer if it's read from a non-immediate index | ||
| 156 | return std::nullopt; | ||
| 157 | } | ||
| 158 | if (!offset.IsImmediate()) { | ||
| 159 | // TODO: Support SSBO arrays | ||
| 160 | return std::nullopt; | ||
| 161 | } | ||
| 162 | const StorageBufferAddr storage_buffer{ | ||
| 163 | .index = index.U32(), | ||
| 164 | .offset = offset.U32(), | ||
| 165 | }; | ||
| 166 | if (bias && !MeetsBias(storage_buffer, *bias)) { | ||
| 167 | // We have to blacklist some addresses in case we wrongly point to them | ||
| 168 | return std::nullopt; | ||
| 169 | } | ||
| 170 | return storage_buffer; | ||
| 171 | } | ||
| 172 | // Reversed loops are more likely to find the right result | ||
| 173 | for (size_t arg = inst->NumArgs(); arg--;) { | ||
| 174 | if (const std::optional storage_buffer{Track(inst->Arg(arg), bias)}) { | ||
| 175 | return *storage_buffer; | ||
| 176 | } | ||
| 177 | } | ||
| 178 | return std::nullopt; | ||
| 179 | } | ||
| 180 | |||
| 181 | /// Collects the storage buffer used by a global memory instruction and the instruction itself | ||
| 182 | void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, | ||
| 183 | StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) { | ||
| 184 | // NVN puts storage buffers in a specific range, we have to bias towards these addresses to | ||
| 185 | // avoid getting false positives | ||
| 186 | static constexpr Bias nvn_bias{ | ||
| 187 | .index{0}, | ||
| 188 | .offset_begin{0x110}, | ||
| 189 | .offset_end{0x610}, | ||
| 190 | }; | ||
| 191 | // First try to find storage buffers in the NVN address | ||
| 192 | const IR::U64 addr{inst->Arg(0)}; | ||
| 193 | std::optional<StorageBufferAddr> storage_buffer{Track(addr, &nvn_bias)}; | ||
| 194 | if (!storage_buffer) { | ||
| 195 | // If it fails, track without a bias | ||
| 196 | storage_buffer = Track(addr, nullptr); | ||
| 197 | if (!storage_buffer) { | ||
| 198 | // If that also failed, drop the global memory usage | ||
| 199 | IgnoreGlobalMemory(block, inst); | ||
| 200 | } | ||
| 201 | } | ||
| 202 | // Collect storage buffer and the instruction | ||
| 203 | storage_buffer_set.insert(*storage_buffer); | ||
| 204 | to_replace.push_back(StorageInst{ | ||
| 205 | .storage_buffer{*storage_buffer}, | ||
| 206 | .inst{inst}, | ||
| 207 | }); | ||
| 208 | } | ||
| 209 | |||
| 210 | /// Tries to track the first 32-bits of a global memory instruction | ||
| 211 | std::optional<IR::U32> TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { | ||
| 212 | // The first argument is the low level GPU pointer to the global memory instruction | ||
| 213 | const IR::U64 addr{inst->Arg(0)}; | ||
| 214 | if (addr.IsImmediate()) { | ||
| 215 | // Not much we can do if it's an immediate | ||
| 216 | return std::nullopt; | ||
| 217 | } | ||
| 218 | // This address is expected to either be a PackUint2x32 or a IAdd64 | ||
| 219 | IR::Inst* addr_inst{addr.InstRecursive()}; | ||
| 220 | s32 imm_offset{0}; | ||
| 221 | if (addr_inst->Opcode() == IR::Opcode::IAdd64) { | ||
| 222 | // If it's an IAdd64, get the immediate offset it is applying and grab the address | ||
| 223 | // instruction. This expects for the instruction to be canonicalized having the address on | ||
| 224 | // the first argument and the immediate offset on the second one. | ||
| 225 | const IR::U64 imm_offset_value{addr_inst->Arg(1)}; | ||
| 226 | if (!imm_offset_value.IsImmediate()) { | ||
| 227 | return std::nullopt; | ||
| 228 | } | ||
| 229 | imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64())); | ||
| 230 | const IR::U64 iadd_addr{addr_inst->Arg(0)}; | ||
| 231 | if (iadd_addr.IsImmediate()) { | ||
| 232 | return std::nullopt; | ||
| 233 | } | ||
| 234 | addr_inst = iadd_addr.Inst(); | ||
| 235 | } | ||
| 236 | // With IAdd64 handled, now PackUint2x32 is expected without exceptions | ||
| 237 | if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { | ||
| 238 | return std::nullopt; | ||
| 239 | } | ||
| 240 | // PackUint2x32 is expected to be generated from a vector | ||
| 241 | const IR::Value vector{addr_inst->Arg(0)}; | ||
| 242 | if (vector.IsImmediate()) { | ||
| 243 | return std::nullopt; | ||
| 244 | } | ||
| 245 | // This vector is expected to be a CompositeConstruct2 | ||
| 246 | IR::Inst* const vector_inst{vector.InstRecursive()}; | ||
| 247 | if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) { | ||
| 248 | return std::nullopt; | ||
| 249 | } | ||
| 250 | // Grab the first argument from the CompositeConstruct2, this is the low address. | ||
| 251 | // Re-apply the offset in case we found one. | ||
| 252 | const IR::U32 low_addr{vector_inst->Arg(0)}; | ||
| 253 | return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; | ||
| 254 | } | ||
| 255 | |||
| 256 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | ||
| 257 | IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { | ||
| 258 | IR::IREmitter ir{block, inst}; | ||
| 259 | IR::U32 offset; | ||
| 260 | if (const std::optional<IR::U32> low_addr{TrackLowAddress(ir, &*inst)}) { | ||
| 261 | offset = *low_addr; | ||
| 262 | } else { | ||
| 263 | offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); | ||
| 264 | } | ||
| 265 | // Subtract the least significant 32 bits from the guest offset. The result is the storage | ||
| 266 | // buffer offset in bytes. | ||
| 267 | const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; | ||
| 268 | return ir.ISub(offset, low_cbuf); | ||
| 269 | } | ||
| 270 | |||
| 271 | /// Replace a global memory load instruction with its storage buffer equivalent | ||
| 272 | void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, | ||
| 273 | const IR::U32& offset) { | ||
| 274 | const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; | ||
| 275 | const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})}; | ||
| 276 | inst->ReplaceUsesWith(value); | ||
| 277 | } | ||
| 278 | |||
| 279 | /// Replace a global memory write instruction with its storage buffer equivalent | ||
| 280 | void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, | ||
| 281 | const IR::U32& offset) { | ||
| 282 | const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; | ||
| 283 | block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)}); | ||
| 284 | inst->Invalidate(); | ||
| 285 | } | ||
| 286 | |||
| 287 | /// Replace a global memory instruction with its storage buffer equivalent | ||
| 288 | void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, | ||
| 289 | const IR::U32& offset) { | ||
| 290 | switch (inst->Opcode()) { | ||
| 291 | case IR::Opcode::LoadGlobalS8: | ||
| 292 | case IR::Opcode::LoadGlobalU8: | ||
| 293 | case IR::Opcode::LoadGlobalS16: | ||
| 294 | case IR::Opcode::LoadGlobalU16: | ||
| 295 | case IR::Opcode::LoadGlobal32: | ||
| 296 | case IR::Opcode::LoadGlobal64: | ||
| 297 | case IR::Opcode::LoadGlobal128: | ||
| 298 | return ReplaceLoad(block, inst, storage_index, offset); | ||
| 299 | case IR::Opcode::WriteGlobalS8: | ||
| 300 | case IR::Opcode::WriteGlobalU8: | ||
| 301 | case IR::Opcode::WriteGlobalS16: | ||
| 302 | case IR::Opcode::WriteGlobalU16: | ||
| 303 | case IR::Opcode::WriteGlobal32: | ||
| 304 | case IR::Opcode::WriteGlobal64: | ||
| 305 | case IR::Opcode::WriteGlobal128: | ||
| 306 | return ReplaceWrite(block, inst, storage_index, offset); | ||
| 307 | default: | ||
| 308 | throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode()); | ||
| 309 | } | ||
| 310 | } | ||
| 311 | } // Anonymous namespace | ||
| 312 | |||
| 313 | void GlobalMemoryToStorageBufferPass(IR::Block& block) { | ||
| 314 | StorageBufferSet storage_buffers; | ||
| 315 | StorageInstVector to_replace; | ||
| 316 | |||
| 317 | for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) { | ||
| 318 | if (!IsGlobalMemory(*inst)) { | ||
| 319 | continue; | ||
| 320 | } | ||
| 321 | CollectStorageBuffers(block, inst, storage_buffers, to_replace); | ||
| 322 | } | ||
| 323 | for (const auto [storage_buffer, inst] : to_replace) { | ||
| 324 | const auto it{storage_buffers.find(storage_buffer)}; | ||
| 325 | const IR::U32 storage_index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}}; | ||
| 326 | const IR::U32 offset{StorageOffset(block, inst, storage_buffer)}; | ||
| 327 | Replace(block, inst, storage_index, offset); | ||
| 328 | } | ||
| 329 | } | ||
| 330 | |||
| 331 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 7f8500087..39a972919 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp | |||
| @@ -10,22 +10,24 @@ | |||
| 10 | 10 | ||
| 11 | namespace Shader::Optimization { | 11 | namespace Shader::Optimization { |
| 12 | 12 | ||
| 13 | void IdentityRemovalPass(IR::Block& block) { | 13 | void IdentityRemovalPass(IR::Function& function) { |
| 14 | std::vector<IR::Inst*> to_invalidate; | 14 | std::vector<IR::Inst*> to_invalidate; |
| 15 | 15 | ||
| 16 | for (auto inst = block.begin(); inst != block.end();) { | 16 | for (auto& block : function.blocks) { |
| 17 | const size_t num_args{inst->NumArgs()}; | 17 | for (auto inst = block->begin(); inst != block->end();) { |
| 18 | for (size_t i = 0; i < num_args; ++i) { | 18 | const size_t num_args{inst->NumArgs()}; |
| 19 | IR::Value arg; | 19 | for (size_t i = 0; i < num_args; ++i) { |
| 20 | while ((arg = inst->Arg(i)).IsIdentity()) { | 20 | IR::Value arg; |
| 21 | inst->SetArg(i, arg.Inst()->Arg(0)); | 21 | while ((arg = inst->Arg(i)).IsIdentity()) { |
| 22 | inst->SetArg(i, arg.Inst()->Arg(0)); | ||
| 23 | } | ||
| 24 | } | ||
| 25 | if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { | ||
| 26 | to_invalidate.push_back(&*inst); | ||
| 27 | inst = block->Instructions().erase(inst); | ||
| 28 | } else { | ||
| 29 | ++inst; | ||
| 22 | } | 30 | } |
| 23 | } | ||
| 24 | if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { | ||
| 25 | to_invalidate.push_back(&*inst); | ||
| 26 | inst = block.Instructions().erase(inst); | ||
| 27 | } else { | ||
| 28 | ++inst; | ||
| 29 | } | 31 | } |
| 30 | } | 32 | } |
| 31 | for (IR::Inst* const inst : to_invalidate) { | 33 | for (IR::Inst* const inst : to_invalidate) { |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 7ed4005ed..578a24d89 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -16,9 +16,11 @@ void Invoke(Func&& func, IR::Function& function) { | |||
| 16 | } | 16 | } |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | void ConstantPropagationPass(IR::Block& block); | ||
| 19 | void DeadCodeEliminationPass(IR::Block& block); | 20 | void DeadCodeEliminationPass(IR::Block& block); |
| 20 | void IdentityRemovalPass(IR::Block& block); | 21 | void GlobalMemoryToStorageBufferPass(IR::Block& block); |
| 22 | void IdentityRemovalPass(IR::Function& function); | ||
| 21 | void SsaRewritePass(IR::Function& function); | 23 | void SsaRewritePass(IR::Function& function); |
| 22 | void VerificationPass(const IR::Block& block); | 24 | void VerificationPass(const IR::Function& function); |
| 23 | 25 | ||
| 24 | } // namespace Shader::Optimization | 26 | } // namespace Shader::Optimization |
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a4b256a40..3c9b020e0 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | |||
| @@ -14,8 +14,6 @@ | |||
| 14 | // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 | 14 | // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 |
| 15 | // | 15 | // |
| 16 | 16 | ||
| 17 | #include <map> | ||
| 18 | |||
| 19 | #include <boost/container/flat_map.hpp> | 17 | #include <boost/container/flat_map.hpp> |
| 20 | 18 | ||
| 21 | #include "shader_recompiler/frontend/ir/basic_block.h" | 19 | #include "shader_recompiler/frontend/ir/basic_block.h" |
| @@ -30,6 +28,12 @@ namespace Shader::Optimization { | |||
| 30 | namespace { | 28 | namespace { |
| 31 | using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>; | 29 | using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>; |
| 32 | 30 | ||
| 31 | struct FlagTag {}; | ||
| 32 | struct ZeroFlagTag : FlagTag {}; | ||
| 33 | struct SignFlagTag : FlagTag {}; | ||
| 34 | struct CarryFlagTag : FlagTag {}; | ||
| 35 | struct OverflowFlagTag : FlagTag {}; | ||
| 36 | |||
| 33 | struct DefTable { | 37 | struct DefTable { |
| 34 | [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { | 38 | [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { |
| 35 | return regs[IR::RegIndex(variable)]; | 39 | return regs[IR::RegIndex(variable)]; |
| @@ -39,8 +43,28 @@ struct DefTable { | |||
| 39 | return preds[IR::PredIndex(variable)]; | 43 | return preds[IR::PredIndex(variable)]; |
| 40 | } | 44 | } |
| 41 | 45 | ||
| 46 | [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { | ||
| 47 | return zero_flag; | ||
| 48 | } | ||
| 49 | |||
| 50 | [[nodiscard]] ValueMap& operator[](SignFlagTag) noexcept { | ||
| 51 | return sign_flag; | ||
| 52 | } | ||
| 53 | |||
| 54 | [[nodiscard]] ValueMap& operator[](CarryFlagTag) noexcept { | ||
| 55 | return carry_flag; | ||
| 56 | } | ||
| 57 | |||
| 58 | [[nodiscard]] ValueMap& operator[](OverflowFlagTag) noexcept { | ||
| 59 | return overflow_flag; | ||
| 60 | } | ||
| 61 | |||
| 42 | std::array<ValueMap, IR::NUM_USER_REGS> regs; | 62 | std::array<ValueMap, IR::NUM_USER_REGS> regs; |
| 43 | std::array<ValueMap, IR::NUM_USER_PREDS> preds; | 63 | std::array<ValueMap, IR::NUM_USER_PREDS> preds; |
| 64 | ValueMap zero_flag; | ||
| 65 | ValueMap sign_flag; | ||
| 66 | ValueMap carry_flag; | ||
| 67 | ValueMap overflow_flag; | ||
| 44 | }; | 68 | }; |
| 45 | 69 | ||
| 46 | IR::Opcode UndefOpcode(IR::Reg) noexcept { | 70 | IR::Opcode UndefOpcode(IR::Reg) noexcept { |
| @@ -51,6 +75,10 @@ IR::Opcode UndefOpcode(IR::Pred) noexcept { | |||
| 51 | return IR::Opcode::Undef1; | 75 | return IR::Opcode::Undef1; |
| 52 | } | 76 | } |
| 53 | 77 | ||
| 78 | IR::Opcode UndefOpcode(const FlagTag&) noexcept { | ||
| 79 | return IR::Opcode::Undef1; | ||
| 80 | } | ||
| 81 | |||
| 54 | [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { | 82 | [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { |
| 55 | return inst.Opcode() == IR::Opcode::Phi; | 83 | return inst.Opcode() == IR::Opcode::Phi; |
| 56 | } | 84 | } |
| @@ -135,6 +163,18 @@ void SsaRewritePass(IR::Function& function) { | |||
| 135 | pass.WriteVariable(pred, block.get(), inst.Arg(1)); | 163 | pass.WriteVariable(pred, block.get(), inst.Arg(1)); |
| 136 | } | 164 | } |
| 137 | break; | 165 | break; |
| 166 | case IR::Opcode::SetZFlag: | ||
| 167 | pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0)); | ||
| 168 | break; | ||
| 169 | case IR::Opcode::SetSFlag: | ||
| 170 | pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0)); | ||
| 171 | break; | ||
| 172 | case IR::Opcode::SetCFlag: | ||
| 173 | pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0)); | ||
| 174 | break; | ||
| 175 | case IR::Opcode::SetOFlag: | ||
| 176 | pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0)); | ||
| 177 | break; | ||
| 138 | case IR::Opcode::GetRegister: | 178 | case IR::Opcode::GetRegister: |
| 139 | if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { | 179 | if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { |
| 140 | inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); | 180 | inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); |
| @@ -145,6 +185,18 @@ void SsaRewritePass(IR::Function& function) { | |||
| 145 | inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); | 185 | inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); |
| 146 | } | 186 | } |
| 147 | break; | 187 | break; |
| 188 | case IR::Opcode::GetZFlag: | ||
| 189 | inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get())); | ||
| 190 | break; | ||
| 191 | case IR::Opcode::GetSFlag: | ||
| 192 | inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get())); | ||
| 193 | break; | ||
| 194 | case IR::Opcode::GetCFlag: | ||
| 195 | inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get())); | ||
| 196 | break; | ||
| 197 | case IR::Opcode::GetOFlag: | ||
| 198 | inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get())); | ||
| 199 | break; | ||
| 148 | default: | 200 | default: |
| 149 | break; | 201 | break; |
| 150 | } | 202 | } |
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 36d9ae39b..8a5adf5a2 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp | |||
| @@ -11,40 +11,44 @@ | |||
| 11 | 11 | ||
| 12 | namespace Shader::Optimization { | 12 | namespace Shader::Optimization { |
| 13 | 13 | ||
| 14 | static void ValidateTypes(const IR::Block& block) { | 14 | static void ValidateTypes(const IR::Function& function) { |
| 15 | for (const IR::Inst& inst : block) { | 15 | for (const auto& block : function.blocks) { |
| 16 | const size_t num_args{inst.NumArgs()}; | 16 | for (const IR::Inst& inst : *block) { |
| 17 | for (size_t i = 0; i < num_args; ++i) { | 17 | const size_t num_args{inst.NumArgs()}; |
| 18 | const IR::Type t1{inst.Arg(i).Type()}; | 18 | for (size_t i = 0; i < num_args; ++i) { |
| 19 | const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; | 19 | const IR::Type t1{inst.Arg(i).Type()}; |
| 20 | if (!IR::AreTypesCompatible(t1, t2)) { | 20 | const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; |
| 21 | throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(block)); | 21 | if (!IR::AreTypesCompatible(t1, t2)) { |
| 22 | throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); | ||
| 23 | } | ||
| 22 | } | 24 | } |
| 23 | } | 25 | } |
| 24 | } | 26 | } |
| 25 | } | 27 | } |
| 26 | 28 | ||
| 27 | static void ValidateUses(const IR::Block& block) { | 29 | static void ValidateUses(const IR::Function& function) { |
| 28 | std::map<IR::Inst*, int> actual_uses; | 30 | std::map<IR::Inst*, int> actual_uses; |
| 29 | for (const IR::Inst& inst : block) { | 31 | for (const auto& block : function.blocks) { |
| 30 | const size_t num_args{inst.NumArgs()}; | 32 | for (const IR::Inst& inst : *block) { |
| 31 | for (size_t i = 0; i < num_args; ++i) { | 33 | const size_t num_args{inst.NumArgs()}; |
| 32 | const IR::Value arg{inst.Arg(i)}; | 34 | for (size_t i = 0; i < num_args; ++i) { |
| 33 | if (!arg.IsImmediate()) { | 35 | const IR::Value arg{inst.Arg(i)}; |
| 34 | ++actual_uses[arg.Inst()]; | 36 | if (!arg.IsImmediate()) { |
| 37 | ++actual_uses[arg.Inst()]; | ||
| 38 | } | ||
| 35 | } | 39 | } |
| 36 | } | 40 | } |
| 37 | } | 41 | } |
| 38 | for (const auto [inst, uses] : actual_uses) { | 42 | for (const auto [inst, uses] : actual_uses) { |
| 39 | if (inst->UseCount() != uses) { | 43 | if (inst->UseCount() != uses) { |
| 40 | throw LogicError("Invalid uses in block:\n{}", IR::DumpBlock(block)); | 44 | throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/); |
| 41 | } | 45 | } |
| 42 | } | 46 | } |
| 43 | } | 47 | } |
| 44 | 48 | ||
| 45 | void VerificationPass(const IR::Block& block) { | 49 | void VerificationPass(const IR::Function& function) { |
| 46 | ValidateTypes(block); | 50 | ValidateTypes(function); |
| 47 | ValidateUses(block); | 51 | ValidateUses(function); |
| 48 | } | 52 | } |
| 49 | 53 | ||
| 50 | } // namespace Shader::Optimization | 54 | } // namespace Shader::Optimization |