diff options
Diffstat (limited to 'src/shader_recompiler/ir_opt')
4 files changed, 210 insertions, 61 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp new file mode 100644 index 000000000..f2326dea1 --- /dev/null +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -0,0 +1,81 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 6 | #include "shader_recompiler/shader_info.h" | ||
| 7 | |||
| 8 | namespace Shader::Optimization { | ||
| 9 | namespace { | ||
| 10 | void AddConstantBufferDescriptor(Info& info, u32 index) { | ||
| 11 | auto& descriptor{info.constant_buffers.at(index)}; | ||
| 12 | if (descriptor) { | ||
| 13 | return; | ||
| 14 | } | ||
| 15 | descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ | ||
| 16 | .index{index}, | ||
| 17 | .count{1}, | ||
| 18 | }); | ||
| 19 | } | ||
| 20 | |||
| 21 | void Visit(Info& info, IR::Inst& inst) { | ||
| 22 | switch (inst.Opcode()) { | ||
| 23 | case IR::Opcode::WorkgroupId: | ||
| 24 | info.uses_workgroup_id = true; | ||
| 25 | break; | ||
| 26 | case IR::Opcode::LocalInvocationId: | ||
| 27 | info.uses_local_invocation_id = true; | ||
| 28 | break; | ||
| 29 | case IR::Opcode::FPAbs16: | ||
| 30 | case IR::Opcode::FPAdd16: | ||
| 31 | case IR::Opcode::FPCeil16: | ||
| 32 | case IR::Opcode::FPFloor16: | ||
| 33 | case IR::Opcode::FPFma16: | ||
| 34 | case IR::Opcode::FPMul16: | ||
| 35 | case IR::Opcode::FPNeg16: | ||
| 36 | case IR::Opcode::FPRoundEven16: | ||
| 37 | case IR::Opcode::FPSaturate16: | ||
| 38 | case IR::Opcode::FPTrunc16: | ||
| 39 | info.uses_fp16; | ||
| 40 | break; | ||
| 41 | case IR::Opcode::FPAbs64: | ||
| 42 | case IR::Opcode::FPAdd64: | ||
| 43 | case IR::Opcode::FPCeil64: | ||
| 44 | case IR::Opcode::FPFloor64: | ||
| 45 | case IR::Opcode::FPFma64: | ||
| 46 | case IR::Opcode::FPMax64: | ||
| 47 | case IR::Opcode::FPMin64: | ||
| 48 | case IR::Opcode::FPMul64: | ||
| 49 | case IR::Opcode::FPNeg64: | ||
| 50 | case IR::Opcode::FPRecip64: | ||
| 51 | case IR::Opcode::FPRecipSqrt64: | ||
| 52 | case IR::Opcode::FPRoundEven64: | ||
| 53 | case IR::Opcode::FPSaturate64: | ||
| 54 | case IR::Opcode::FPTrunc64: | ||
| 55 | info.uses_fp64 = true; | ||
| 56 | break; | ||
| 57 | case IR::Opcode::GetCbuf: | ||
| 58 | if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { | ||
| 59 | AddConstantBufferDescriptor(info, index.U32()); | ||
| 60 | } else { | ||
| 61 | throw NotImplementedException("Constant buffer with non-immediate index"); | ||
| 62 | } | ||
| 63 | break; | ||
| 64 | default: | ||
| 65 | break; | ||
| 66 | } | ||
| 67 | } | ||
| 68 | } // Anonymous namespace | ||
| 69 | |||
| 70 | void CollectShaderInfoPass(IR::Program& program) { | ||
| 71 | Info& info{program.info}; | ||
| 72 | for (IR::Function& function : program.functions) { | ||
| 73 | for (IR::Block* const block : function.post_order_blocks) { | ||
| 74 | for (IR::Inst& inst : block->Instructions()) { | ||
| 75 | Visit(info, inst); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index cbde65b9b..f1ad16d60 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -77,6 +77,16 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { | |||
| 77 | return true; | 77 | return true; |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | template <typename Func> | ||
| 81 | bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { | ||
| 82 | if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { | ||
| 83 | return false; | ||
| 84 | } | ||
| 85 | using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>; | ||
| 86 | inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); | ||
| 87 | return true; | ||
| 88 | } | ||
| 89 | |||
| 80 | void FoldGetRegister(IR::Inst& inst) { | 90 | void FoldGetRegister(IR::Inst& inst) { |
| 81 | if (inst.Arg(0).Reg() == IR::Reg::RZ) { | 91 | if (inst.Arg(0).Reg() == IR::Reg::RZ) { |
| 82 | inst.ReplaceUsesWith(IR::Value{u32{0}}); | 92 | inst.ReplaceUsesWith(IR::Value{u32{0}}); |
| @@ -103,6 +113,52 @@ void FoldAdd(IR::Inst& inst) { | |||
| 103 | } | 113 | } |
| 104 | } | 114 | } |
| 105 | 115 | ||
| 116 | void FoldISub32(IR::Inst& inst) { | ||
| 117 | if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) { | ||
| 118 | return; | ||
| 119 | } | ||
| 120 | if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) { | ||
| 121 | return; | ||
| 122 | } | ||
| 123 | // ISub32 is generally used to subtract two constant buffers, compare and replace this with | ||
| 124 | // zero if they equal. | ||
| 125 | const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { | ||
| 126 | return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf && | ||
| 127 | a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); | ||
| 128 | }}; | ||
| 129 | IR::Inst* op_a{inst.Arg(0).InstRecursive()}; | ||
| 130 | IR::Inst* op_b{inst.Arg(1).InstRecursive()}; | ||
| 131 | if (equal_cbuf(op_a, op_b)) { | ||
| 132 | inst.ReplaceUsesWith(IR::Value{u32{0}}); | ||
| 133 | return; | ||
| 134 | } | ||
| 135 | // It's also possible a value is being added to a cbuf and then subtracted | ||
| 136 | if (op_b->Opcode() == IR::Opcode::IAdd32) { | ||
| 137 | // Canonicalize local variables to simplify the following logic | ||
| 138 | std::swap(op_a, op_b); | ||
| 139 | } | ||
| 140 | if (op_b->Opcode() != IR::Opcode::GetCbuf) { | ||
| 141 | return; | ||
| 142 | } | ||
| 143 | IR::Inst* const inst_cbuf{op_b}; | ||
| 144 | if (op_a->Opcode() != IR::Opcode::IAdd32) { | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | IR::Value add_op_a{op_a->Arg(0)}; | ||
| 148 | IR::Value add_op_b{op_a->Arg(1)}; | ||
| 149 | if (add_op_b.IsImmediate()) { | ||
| 150 | // Canonicalize | ||
| 151 | std::swap(add_op_a, add_op_b); | ||
| 152 | } | ||
| 153 | if (add_op_b.IsImmediate()) { | ||
| 154 | return; | ||
| 155 | } | ||
| 156 | IR::Inst* const add_cbuf{add_op_b.InstRecursive()}; | ||
| 157 | if (equal_cbuf(add_cbuf, inst_cbuf)) { | ||
| 158 | inst.ReplaceUsesWith(add_op_a); | ||
| 159 | } | ||
| 160 | } | ||
| 161 | |||
| 106 | template <typename T> | 162 | template <typename T> |
| 107 | void FoldSelect(IR::Inst& inst) { | 163 | void FoldSelect(IR::Inst& inst) { |
| 108 | const IR::Value cond{inst.Arg(0)}; | 164 | const IR::Value cond{inst.Arg(0)}; |
| @@ -170,15 +226,6 @@ IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence< | |||
| 170 | return IR::Value{func(Arg<Traits::ArgType<I>>(inst.Arg(I))...)}; | 226 | return IR::Value{func(Arg<Traits::ArgType<I>>(inst.Arg(I))...)}; |
| 171 | } | 227 | } |
| 172 | 228 | ||
| 173 | template <typename Func> | ||
| 174 | void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { | ||
| 175 | if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { | ||
| 176 | return; | ||
| 177 | } | ||
| 178 | using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>; | ||
| 179 | inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); | ||
| 180 | } | ||
| 181 | |||
| 182 | void FoldBranchConditional(IR::Inst& inst) { | 229 | void FoldBranchConditional(IR::Inst& inst) { |
| 183 | const IR::U1 cond{inst.Arg(0)}; | 230 | const IR::U1 cond{inst.Arg(0)}; |
| 184 | if (cond.IsImmediate()) { | 231 | if (cond.IsImmediate()) { |
| @@ -205,6 +252,8 @@ void ConstantPropagation(IR::Inst& inst) { | |||
| 205 | return FoldGetPred(inst); | 252 | return FoldGetPred(inst); |
| 206 | case IR::Opcode::IAdd32: | 253 | case IR::Opcode::IAdd32: |
| 207 | return FoldAdd<u32>(inst); | 254 | return FoldAdd<u32>(inst); |
| 255 | case IR::Opcode::ISub32: | ||
| 256 | return FoldISub32(inst); | ||
| 208 | case IR::Opcode::BitCastF32U32: | 257 | case IR::Opcode::BitCastF32U32: |
| 209 | return FoldBitCast<f32, u32>(inst, IR::Opcode::BitCastU32F32); | 258 | return FoldBitCast<f32, u32>(inst, IR::Opcode::BitCastU32F32); |
| 210 | case IR::Opcode::BitCastU32F32: | 259 | case IR::Opcode::BitCastU32F32: |
| @@ -220,17 +269,20 @@ void ConstantPropagation(IR::Inst& inst) { | |||
| 220 | case IR::Opcode::LogicalNot: | 269 | case IR::Opcode::LogicalNot: |
| 221 | return FoldLogicalNot(inst); | 270 | return FoldLogicalNot(inst); |
| 222 | case IR::Opcode::SLessThan: | 271 | case IR::Opcode::SLessThan: |
| 223 | return FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); | 272 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); |
| 273 | return; | ||
| 224 | case IR::Opcode::ULessThan: | 274 | case IR::Opcode::ULessThan: |
| 225 | return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); | 275 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); |
| 276 | return; | ||
| 226 | case IR::Opcode::BitFieldUExtract: | 277 | case IR::Opcode::BitFieldUExtract: |
| 227 | return FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { | 278 | FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { |
| 228 | if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) { | 279 | if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) { |
| 229 | throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, | 280 | throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, |
| 230 | base, shift, count); | 281 | base, shift, count); |
| 231 | } | 282 | } |
| 232 | return (base >> shift) & ((1U << count) - 1); | 283 | return (base >> shift) & ((1U << count) - 1); |
| 233 | }); | 284 | }); |
| 285 | return; | ||
| 234 | case IR::Opcode::BranchConditional: | 286 | case IR::Opcode::BranchConditional: |
| 235 | return FoldBranchConditional(inst); | 287 | return FoldBranchConditional(inst); |
| 236 | default: | 288 | default: |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index b40c0c57b..bf230a850 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -28,7 +28,8 @@ struct StorageBufferAddr { | |||
| 28 | /// Block iterator to a global memory instruction and the storage buffer it uses | 28 | /// Block iterator to a global memory instruction and the storage buffer it uses |
| 29 | struct StorageInst { | 29 | struct StorageInst { |
| 30 | StorageBufferAddr storage_buffer; | 30 | StorageBufferAddr storage_buffer; |
| 31 | IR::Block::iterator inst; | 31 | IR::Inst* inst; |
| 32 | IR::Block* block; | ||
| 32 | }; | 33 | }; |
| 33 | 34 | ||
| 34 | /// Bias towards a certain range of constant buffers when looking for storage buffers | 35 | /// Bias towards a certain range of constant buffers when looking for storage buffers |
| @@ -41,7 +42,7 @@ struct Bias { | |||
| 41 | using StorageBufferSet = | 42 | using StorageBufferSet = |
| 42 | boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, | 43 | boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, |
| 43 | boost::container::small_vector<StorageBufferAddr, 16>>; | 44 | boost::container::small_vector<StorageBufferAddr, 16>>; |
| 44 | using StorageInstVector = boost::container::small_vector<StorageInst, 32>; | 45 | using StorageInstVector = boost::container::small_vector<StorageInst, 24>; |
| 45 | 46 | ||
| 46 | /// Returns true when the instruction is a global memory instruction | 47 | /// Returns true when the instruction is a global memory instruction |
| 47 | bool IsGlobalMemory(const IR::Inst& inst) { | 48 | bool IsGlobalMemory(const IR::Inst& inst) { |
| @@ -109,23 +110,22 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce | |||
| 109 | } | 110 | } |
| 110 | 111 | ||
| 111 | /// Discards a global memory operation, reads return zero and writes are ignored | 112 | /// Discards a global memory operation, reads return zero and writes are ignored |
| 112 | void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { | 113 | void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { |
| 114 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 113 | const IR::Value zero{u32{0}}; | 115 | const IR::Value zero{u32{0}}; |
| 114 | switch (inst->Opcode()) { | 116 | switch (inst.Opcode()) { |
| 115 | case IR::Opcode::LoadGlobalS8: | 117 | case IR::Opcode::LoadGlobalS8: |
| 116 | case IR::Opcode::LoadGlobalU8: | 118 | case IR::Opcode::LoadGlobalU8: |
| 117 | case IR::Opcode::LoadGlobalS16: | 119 | case IR::Opcode::LoadGlobalS16: |
| 118 | case IR::Opcode::LoadGlobalU16: | 120 | case IR::Opcode::LoadGlobalU16: |
| 119 | case IR::Opcode::LoadGlobal32: | 121 | case IR::Opcode::LoadGlobal32: |
| 120 | inst->ReplaceUsesWith(zero); | 122 | inst.ReplaceUsesWith(zero); |
| 121 | break; | 123 | break; |
| 122 | case IR::Opcode::LoadGlobal64: | 124 | case IR::Opcode::LoadGlobal64: |
| 123 | inst->ReplaceUsesWith(IR::Value{ | 125 | inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)}); |
| 124 | &*block.PrependNewInst(inst, IR::Opcode::CompositeConstructU32x2, {zero, zero})}); | ||
| 125 | break; | 126 | break; |
| 126 | case IR::Opcode::LoadGlobal128: | 127 | case IR::Opcode::LoadGlobal128: |
| 127 | inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( | 128 | inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)}); |
| 128 | inst, IR::Opcode::CompositeConstructU32x4, {zero, zero, zero, zero})}); | ||
| 129 | break; | 129 | break; |
| 130 | case IR::Opcode::WriteGlobalS8: | 130 | case IR::Opcode::WriteGlobalS8: |
| 131 | case IR::Opcode::WriteGlobalU8: | 131 | case IR::Opcode::WriteGlobalU8: |
| @@ -134,11 +134,10 @@ void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { | |||
| 134 | case IR::Opcode::WriteGlobal32: | 134 | case IR::Opcode::WriteGlobal32: |
| 135 | case IR::Opcode::WriteGlobal64: | 135 | case IR::Opcode::WriteGlobal64: |
| 136 | case IR::Opcode::WriteGlobal128: | 136 | case IR::Opcode::WriteGlobal128: |
| 137 | inst->Invalidate(); | 137 | inst.Invalidate(); |
| 138 | break; | 138 | break; |
| 139 | default: | 139 | default: |
| 140 | throw LogicError("Invalid opcode to discard its global memory operation {}", | 140 | throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); |
| 141 | inst->Opcode()); | ||
| 142 | } | 141 | } |
| 143 | } | 142 | } |
| 144 | 143 | ||
| @@ -232,8 +231,8 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) | |||
| 232 | } | 231 | } |
| 233 | 232 | ||
| 234 | /// Collects the storage buffer used by a global memory instruction and the instruction itself | 233 | /// Collects the storage buffer used by a global memory instruction and the instruction itself |
| 235 | void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, | 234 | void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, |
| 236 | StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) { | 235 | StorageInstVector& to_replace) { |
| 237 | // NVN puts storage buffers in a specific range, we have to bias towards these addresses to | 236 | // NVN puts storage buffers in a specific range, we have to bias towards these addresses to |
| 238 | // avoid getting false positives | 237 | // avoid getting false positives |
| 239 | static constexpr Bias nvn_bias{ | 238 | static constexpr Bias nvn_bias{ |
| @@ -241,19 +240,13 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, | |||
| 241 | .offset_begin{0x110}, | 240 | .offset_begin{0x110}, |
| 242 | .offset_end{0x610}, | 241 | .offset_end{0x610}, |
| 243 | }; | 242 | }; |
| 244 | // First try to find storage buffers in the NVN address | ||
| 245 | const IR::U64 addr{inst->Arg(0)}; | ||
| 246 | if (addr.IsImmediate()) { | ||
| 247 | // Immediate addresses can't be lowered to a storage buffer | ||
| 248 | DiscardGlobalMemory(block, inst); | ||
| 249 | return; | ||
| 250 | } | ||
| 251 | // Track the low address of the instruction | 243 | // Track the low address of the instruction |
| 252 | const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(addr.InstRecursive())}; | 244 | const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; |
| 253 | if (!low_addr_info) { | 245 | if (!low_addr_info) { |
| 254 | DiscardGlobalMemory(block, inst); | 246 | DiscardGlobalMemory(block, inst); |
| 255 | return; | 247 | return; |
| 256 | } | 248 | } |
| 249 | // First try to find storage buffers in the NVN address | ||
| 257 | const IR::U32 low_addr{low_addr_info->value}; | 250 | const IR::U32 low_addr{low_addr_info->value}; |
| 258 | std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; | 251 | std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; |
| 259 | if (!storage_buffer) { | 252 | if (!storage_buffer) { |
| @@ -269,21 +262,22 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, | |||
| 269 | storage_buffer_set.insert(*storage_buffer); | 262 | storage_buffer_set.insert(*storage_buffer); |
| 270 | to_replace.push_back(StorageInst{ | 263 | to_replace.push_back(StorageInst{ |
| 271 | .storage_buffer{*storage_buffer}, | 264 | .storage_buffer{*storage_buffer}, |
| 272 | .inst{inst}, | 265 | .inst{&inst}, |
| 266 | .block{&block}, | ||
| 273 | }); | 267 | }); |
| 274 | } | 268 | } |
| 275 | 269 | ||
| 276 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | 270 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction |
| 277 | IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { | 271 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { |
| 278 | IR::IREmitter ir{block, inst}; | 272 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; |
| 279 | IR::U32 offset; | 273 | IR::U32 offset; |
| 280 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&*inst)}) { | 274 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { |
| 281 | offset = low_addr->value; | 275 | offset = low_addr->value; |
| 282 | if (low_addr->imm_offset != 0) { | 276 | if (low_addr->imm_offset != 0) { |
| 283 | offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); | 277 | offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); |
| 284 | } | 278 | } |
| 285 | } else { | 279 | } else { |
| 286 | offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); | 280 | offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); |
| 287 | } | 281 | } |
| 288 | // Subtract the least significant 32 bits from the guest offset. The result is the storage | 282 | // Subtract the least significant 32 bits from the guest offset. The result is the storage |
| 289 | // buffer offset in bytes. | 283 | // buffer offset in bytes. |
| @@ -292,25 +286,27 @@ IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferA | |||
| 292 | } | 286 | } |
| 293 | 287 | ||
| 294 | /// Replace a global memory load instruction with its storage buffer equivalent | 288 | /// Replace a global memory load instruction with its storage buffer equivalent |
| 295 | void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, | 289 | void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, |
| 296 | const IR::U32& offset) { | 290 | const IR::U32& offset) { |
| 297 | const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; | 291 | const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; |
| 298 | const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})}; | 292 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; |
| 299 | inst->ReplaceUsesWith(value); | 293 | const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; |
| 294 | inst.ReplaceUsesWith(value); | ||
| 300 | } | 295 | } |
| 301 | 296 | ||
| 302 | /// Replace a global memory write instruction with its storage buffer equivalent | 297 | /// Replace a global memory write instruction with its storage buffer equivalent |
| 303 | void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, | 298 | void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, |
| 304 | const IR::U32& offset) { | 299 | const IR::U32& offset) { |
| 305 | const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; | 300 | const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; |
| 306 | block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)}); | 301 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; |
| 307 | inst->Invalidate(); | 302 | block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); |
| 303 | inst.Invalidate(); | ||
| 308 | } | 304 | } |
| 309 | 305 | ||
| 310 | /// Replace a global memory instruction with its storage buffer equivalent | 306 | /// Replace a global memory instruction with its storage buffer equivalent |
| 311 | void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, | 307 | void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, |
| 312 | const IR::U32& offset) { | 308 | const IR::U32& offset) { |
| 313 | switch (inst->Opcode()) { | 309 | switch (inst.Opcode()) { |
| 314 | case IR::Opcode::LoadGlobalS8: | 310 | case IR::Opcode::LoadGlobalS8: |
| 315 | case IR::Opcode::LoadGlobalU8: | 311 | case IR::Opcode::LoadGlobalU8: |
| 316 | case IR::Opcode::LoadGlobalS16: | 312 | case IR::Opcode::LoadGlobalS16: |
| @@ -328,26 +324,44 @@ void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_ | |||
| 328 | case IR::Opcode::WriteGlobal128: | 324 | case IR::Opcode::WriteGlobal128: |
| 329 | return ReplaceWrite(block, inst, storage_index, offset); | 325 | return ReplaceWrite(block, inst, storage_index, offset); |
| 330 | default: | 326 | default: |
| 331 | throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode()); | 327 | throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); |
| 332 | } | 328 | } |
| 333 | } | 329 | } |
| 334 | } // Anonymous namespace | 330 | } // Anonymous namespace |
| 335 | 331 | ||
| 336 | void GlobalMemoryToStorageBufferPass(IR::Block& block) { | 332 | void GlobalMemoryToStorageBufferPass(IR::Program& program) { |
| 337 | StorageBufferSet storage_buffers; | 333 | StorageBufferSet storage_buffers; |
| 338 | StorageInstVector to_replace; | 334 | StorageInstVector to_replace; |
| 339 | 335 | ||
| 340 | for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) { | 336 | for (IR::Function& function : program.functions) { |
| 341 | if (!IsGlobalMemory(*inst)) { | 337 | for (IR::Block* const block : function.post_order_blocks) { |
| 342 | continue; | 338 | for (IR::Inst& inst : block->Instructions()) { |
| 339 | if (!IsGlobalMemory(inst)) { | ||
| 340 | continue; | ||
| 341 | } | ||
| 342 | CollectStorageBuffers(*block, inst, storage_buffers, to_replace); | ||
| 343 | } | ||
| 343 | } | 344 | } |
| 344 | CollectStorageBuffers(block, inst, storage_buffers, to_replace); | ||
| 345 | } | 345 | } |
| 346 | for (const auto [storage_buffer, inst] : to_replace) { | 346 | Info& info{program.info}; |
| 347 | const auto it{storage_buffers.find(storage_buffer)}; | 347 | u32 storage_index{}; |
| 348 | const IR::U32 storage_index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}}; | 348 | for (const StorageBufferAddr& storage_buffer : storage_buffers) { |
| 349 | const IR::U32 offset{StorageOffset(block, inst, storage_buffer)}; | 349 | info.storage_buffers_descriptors.push_back({ |
| 350 | Replace(block, inst, storage_index, offset); | 350 | .cbuf_index{storage_buffer.index}, |
| 351 | .cbuf_offset{storage_buffer.offset}, | ||
| 352 | .count{1}, | ||
| 353 | }); | ||
| 354 | info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); | ||
| 355 | ++storage_index; | ||
| 356 | } | ||
| 357 | for (const StorageInst& storage_inst : to_replace) { | ||
| 358 | const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; | ||
| 359 | const auto it{storage_buffers.find(storage_inst.storage_buffer)}; | ||
| 360 | const IR::U32 index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}}; | ||
| 361 | IR::Block* const block{storage_inst.block}; | ||
| 362 | IR::Inst* const inst{storage_inst.inst}; | ||
| 363 | const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; | ||
| 364 | Replace(*block, *inst, index, offset); | ||
| 351 | } | 365 | } |
| 352 | } | 366 | } |
| 353 | 367 | ||
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 30eb31588..89e5811d3 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "shader_recompiler/frontend/ir/basic_block.h" | 9 | #include "shader_recompiler/frontend/ir/basic_block.h" |
| 10 | #include "shader_recompiler/frontend/ir/function.h" | 10 | #include "shader_recompiler/frontend/ir/function.h" |
| 11 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 11 | 12 | ||
| 12 | namespace Shader::Optimization { | 13 | namespace Shader::Optimization { |
| 13 | 14 | ||
| @@ -18,9 +19,10 @@ void PostOrderInvoke(Func&& func, IR::Function& function) { | |||
| 18 | } | 19 | } |
| 19 | } | 20 | } |
| 20 | 21 | ||
| 22 | void CollectShaderInfoPass(IR::Program& program); | ||
| 21 | void ConstantPropagationPass(IR::Block& block); | 23 | void ConstantPropagationPass(IR::Block& block); |
| 22 | void DeadCodeEliminationPass(IR::Block& block); | 24 | void DeadCodeEliminationPass(IR::Block& block); |
| 23 | void GlobalMemoryToStorageBufferPass(IR::Block& block); | 25 | void GlobalMemoryToStorageBufferPass(IR::Program& program); |
| 24 | void IdentityRemovalPass(IR::Function& function); | 26 | void IdentityRemovalPass(IR::Function& function); |
| 25 | void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); | 27 | void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); |
| 26 | void VerificationPass(const IR::Function& function); | 28 | void VerificationPass(const IR::Function& function); |