diff options
| author | 2021-02-15 00:07:52 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:22 -0400 | |
| commit | 1c0b8bca5e1d2af65dff66c19b7ebb3060ce1229 (patch) | |
| tree | b4fa108f3774c7c37c46bd1c192f290a2dec4908 /src | |
| parent | shader: Add support for forward declarations (diff) | |
| download | yuzu-1c0b8bca5e1d2af65dff66c19b7ebb3060ce1229.tar.gz yuzu-1c0b8bca5e1d2af65dff66c19b7ebb3060ce1229.tar.xz yuzu-1c0b8bca5e1d2af65dff66c19b7ebb3060ce1229.zip | |
shader: Fix tracking
Diffstat (limited to 'src')
| -rw-r--r-- | src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | 122 |
1 files changed, 72 insertions, 50 deletions
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 08fd364bb..b40c0c57b 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -142,6 +142,58 @@ void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { | |||
| 142 | } | 142 | } |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | struct LowAddrInfo { | ||
| 146 | IR::U32 value; | ||
| 147 | s32 imm_offset; | ||
| 148 | }; | ||
| 149 | |||
| 150 | /// Tries to track the first 32-bits of a global memory instruction | ||
| 151 | std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { | ||
| 152 | // The first argument is the low level GPU pointer to the global memory instruction | ||
| 153 | const IR::U64 addr{inst->Arg(0)}; | ||
| 154 | if (addr.IsImmediate()) { | ||
| 155 | // Not much we can do if it's an immediate | ||
| 156 | return std::nullopt; | ||
| 157 | } | ||
| 158 | // This address is expected to either be a PackUint2x32 or a IAdd64 | ||
| 159 | IR::Inst* addr_inst{addr.InstRecursive()}; | ||
| 160 | s32 imm_offset{0}; | ||
| 161 | if (addr_inst->Opcode() == IR::Opcode::IAdd64) { | ||
| 162 | // If it's an IAdd64, get the immediate offset it is applying and grab the address | ||
| 163 | // instruction. This expects for the instruction to be canonicalized having the address on | ||
| 164 | // the first argument and the immediate offset on the second one. | ||
| 165 | const IR::U64 imm_offset_value{addr_inst->Arg(1)}; | ||
| 166 | if (!imm_offset_value.IsImmediate()) { | ||
| 167 | return std::nullopt; | ||
| 168 | } | ||
| 169 | imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64())); | ||
| 170 | const IR::U64 iadd_addr{addr_inst->Arg(0)}; | ||
| 171 | if (iadd_addr.IsImmediate()) { | ||
| 172 | return std::nullopt; | ||
| 173 | } | ||
| 174 | addr_inst = iadd_addr.Inst(); | ||
| 175 | } | ||
| 176 | // With IAdd64 handled, now PackUint2x32 is expected without exceptions | ||
| 177 | if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { | ||
| 178 | return std::nullopt; | ||
| 179 | } | ||
| 180 | // PackUint2x32 is expected to be generated from a vector | ||
| 181 | const IR::Value vector{addr_inst->Arg(0)}; | ||
| 182 | if (vector.IsImmediate()) { | ||
| 183 | return std::nullopt; | ||
| 184 | } | ||
| 185 | // This vector is expected to be a CompositeConstructU32x2 | ||
| 186 | IR::Inst* const vector_inst{vector.InstRecursive()}; | ||
| 187 | if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { | ||
| 188 | return std::nullopt; | ||
| 189 | } | ||
| 190 | // Grab the first argument from the CompositeConstructU32x2, this is the low address. | ||
| 191 | return LowAddrInfo{ | ||
| 192 | .value{IR::U32{vector_inst->Arg(0)}}, | ||
| 193 | .imm_offset{imm_offset}, | ||
| 194 | }; | ||
| 195 | } | ||
| 196 | |||
| 145 | /// Recursively tries to track the storage buffer address used by a global memory instruction | 197 | /// Recursively tries to track the storage buffer address used by a global memory instruction |
| 146 | std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { | 198 | std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { |
| 147 | if (value.IsImmediate()) { | 199 | if (value.IsImmediate()) { |
| @@ -191,13 +243,26 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, | |||
| 191 | }; | 243 | }; |
| 192 | // First try to find storage buffers in the NVN address | 244 | // First try to find storage buffers in the NVN address |
| 193 | const IR::U64 addr{inst->Arg(0)}; | 245 | const IR::U64 addr{inst->Arg(0)}; |
| 194 | std::optional<StorageBufferAddr> storage_buffer{Track(addr, &nvn_bias)}; | 246 | if (addr.IsImmediate()) { |
| 247 | // Immediate addresses can't be lowered to a storage buffer | ||
| 248 | DiscardGlobalMemory(block, inst); | ||
| 249 | return; | ||
| 250 | } | ||
| 251 | // Track the low address of the instruction | ||
| 252 | const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(addr.InstRecursive())}; | ||
| 253 | if (!low_addr_info) { | ||
| 254 | DiscardGlobalMemory(block, inst); | ||
| 255 | return; | ||
| 256 | } | ||
| 257 | const IR::U32 low_addr{low_addr_info->value}; | ||
| 258 | std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; | ||
| 195 | if (!storage_buffer) { | 259 | if (!storage_buffer) { |
| 196 | // If it fails, track without a bias | 260 | // If it fails, track without a bias |
| 197 | storage_buffer = Track(addr, nullptr); | 261 | storage_buffer = Track(low_addr, nullptr); |
| 198 | if (!storage_buffer) { | 262 | if (!storage_buffer) { |
| 199 | // If that also failed, drop the global memory usage | 263 | // If that also failed, drop the global memory usage |
| 200 | DiscardGlobalMemory(block, inst); | 264 | DiscardGlobalMemory(block, inst); |
| 265 | return; | ||
| 201 | } | 266 | } |
| 202 | } | 267 | } |
| 203 | // Collect storage buffer and the instruction | 268 | // Collect storage buffer and the instruction |
| @@ -208,58 +273,15 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, | |||
| 208 | }); | 273 | }); |
| 209 | } | 274 | } |
| 210 | 275 | ||
| 211 | /// Tries to track the first 32-bits of a global memory instruction | ||
| 212 | std::optional<IR::U32> TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { | ||
| 213 | // The first argument is the low level GPU pointer to the global memory instruction | ||
| 214 | const IR::U64 addr{inst->Arg(0)}; | ||
| 215 | if (addr.IsImmediate()) { | ||
| 216 | // Not much we can do if it's an immediate | ||
| 217 | return std::nullopt; | ||
| 218 | } | ||
| 219 | // This address is expected to either be a PackUint2x32 or a IAdd64 | ||
| 220 | IR::Inst* addr_inst{addr.InstRecursive()}; | ||
| 221 | s32 imm_offset{0}; | ||
| 222 | if (addr_inst->Opcode() == IR::Opcode::IAdd64) { | ||
| 223 | // If it's an IAdd64, get the immediate offset it is applying and grab the address | ||
| 224 | // instruction. This expects for the instruction to be canonicalized having the address on | ||
| 225 | // the first argument and the immediate offset on the second one. | ||
| 226 | const IR::U64 imm_offset_value{addr_inst->Arg(1)}; | ||
| 227 | if (!imm_offset_value.IsImmediate()) { | ||
| 228 | return std::nullopt; | ||
| 229 | } | ||
| 230 | imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64())); | ||
| 231 | const IR::U64 iadd_addr{addr_inst->Arg(0)}; | ||
| 232 | if (iadd_addr.IsImmediate()) { | ||
| 233 | return std::nullopt; | ||
| 234 | } | ||
| 235 | addr_inst = iadd_addr.Inst(); | ||
| 236 | } | ||
| 237 | // With IAdd64 handled, now PackUint2x32 is expected without exceptions | ||
| 238 | if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { | ||
| 239 | return std::nullopt; | ||
| 240 | } | ||
| 241 | // PackUint2x32 is expected to be generated from a vector | ||
| 242 | const IR::Value vector{addr_inst->Arg(0)}; | ||
| 243 | if (vector.IsImmediate()) { | ||
| 244 | return std::nullopt; | ||
| 245 | } | ||
| 246 | // This vector is expected to be a CompositeConstructU32x2 | ||
| 247 | IR::Inst* const vector_inst{vector.InstRecursive()}; | ||
| 248 | if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { | ||
| 249 | return std::nullopt; | ||
| 250 | } | ||
| 251 | // Grab the first argument from the CompositeConstructU32x2, this is the low address. | ||
| 252 | // Re-apply the offset in case we found one. | ||
| 253 | const IR::U32 low_addr{vector_inst->Arg(0)}; | ||
| 254 | return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; | ||
| 255 | } | ||
| 256 | |||
| 257 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | 276 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction |
| 258 | IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { | 277 | IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { |
| 259 | IR::IREmitter ir{block, inst}; | 278 | IR::IREmitter ir{block, inst}; |
| 260 | IR::U32 offset; | 279 | IR::U32 offset; |
| 261 | if (const std::optional<IR::U32> low_addr{TrackLowAddress(ir, &*inst)}) { | 280 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&*inst)}) { |
| 262 | offset = *low_addr; | 281 | offset = low_addr->value; |
| 282 | if (low_addr->imm_offset != 0) { | ||
| 283 | offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); | ||
| 284 | } | ||
| 263 | } else { | 285 | } else { |
| 264 | offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); | 286 | offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); |
| 265 | } | 287 | } |