diff options
| author | 2021-04-03 05:18:12 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:26 -0400 | |
| commit | 9a342f5605aef385612053d7d8b564f541952eae (patch) | |
| tree | 0a52f80b351e9664c1f14cb50672c71398c001e7 /src | |
| parent | shader: Fix fp16 merge when using native fp16 (diff) | |
| download | yuzu-9a342f5605aef385612053d7d8b564f541952eae.tar.gz yuzu-9a342f5605aef385612053d7d8b564f541952eae.tar.xz yuzu-9a342f5605aef385612053d7d8b564f541952eae.zip | |
shader: Rework global memory tracking to use breadth-first search
Diffstat (limited to 'src')
| -rw-r--r-- | src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | 149 |
1 files changed, 80 insertions, 69 deletions
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index c8bd7b329..f94c82e21 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -4,9 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <compare> | 6 | #include <compare> |
| 7 | #include <map> | ||
| 8 | #include <optional> | 7 | #include <optional> |
| 9 | #include <ranges> | 8 | #include <ranges> |
| 9 | #include <queue> | ||
| 10 | 10 | ||
| 11 | #include <boost/container/flat_set.hpp> | 11 | #include <boost/container/flat_set.hpp> |
| 12 | #include <boost/container/small_vector.hpp> | 12 | #include <boost/container/small_vector.hpp> |
| @@ -40,15 +40,19 @@ struct Bias { | |||
| 40 | u32 offset_end; | 40 | u32 offset_end; |
| 41 | }; | 41 | }; |
| 42 | 42 | ||
| 43 | using boost::container::flat_set; | ||
| 44 | using boost::container::small_vector; | ||
| 43 | using StorageBufferSet = | 45 | using StorageBufferSet = |
| 44 | boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, | 46 | flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>; |
| 45 | boost::container::small_vector<StorageBufferAddr, 16>>; | 47 | using StorageInstVector = small_vector<StorageInst, 24>; |
| 46 | using StorageInstVector = boost::container::small_vector<StorageInst, 24>; | ||
| 47 | using VisitedBlocks = boost::container::flat_set<IR::Block*, std::less<IR::Block*>, | ||
| 48 | boost::container::small_vector<IR::Block*, 4>>; | ||
| 49 | using StorageWritesSet = | 48 | using StorageWritesSet = |
| 50 | boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, | 49 | flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>; |
| 51 | boost::container::small_vector<StorageBufferAddr, 16>>; | 50 | |
| 51 | struct StorageInfo { | ||
| 52 | StorageBufferSet set; | ||
| 53 | StorageInstVector to_replace; | ||
| 54 | StorageWritesSet writes; | ||
| 55 | }; | ||
| 52 | 56 | ||
| 53 | /// Returns true when the instruction is a global memory instruction | 57 | /// Returns true when the instruction is a global memory instruction |
| 54 | bool IsGlobalMemory(const IR::Inst& inst) { | 58 | bool IsGlobalMemory(const IR::Inst& inst) { |
| @@ -215,60 +219,72 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { | |||
| 215 | }; | 219 | }; |
| 216 | } | 220 | } |
| 217 | 221 | ||
| 218 | /// Recursively tries to track the storage buffer address used by a global memory instruction | 222 | /// Tries to get the storage buffer out of a constant buffer read instruction |
| 219 | std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value, const Bias* bias, | 223 | std::optional<StorageBufferAddr> TryGetStorageBuffer(const IR::Inst* inst, const Bias* bias) { |
| 220 | VisitedBlocks& visited) { | 224 | if (inst->Opcode() != IR::Opcode::GetCbufU32) { |
| 225 | return std::nullopt; | ||
| 226 | } | ||
| 227 | const IR::Value index{inst->Arg(0)}; | ||
| 228 | const IR::Value offset{inst->Arg(1)}; | ||
| 229 | if (!index.IsImmediate()) { | ||
| 230 | // Definitely not a storage buffer if it's read from a non-immediate index | ||
| 231 | return std::nullopt; | ||
| 232 | } | ||
| 233 | if (!offset.IsImmediate()) { | ||
| 234 | // TODO: Support SSBO arrays | ||
| 235 | return std::nullopt; | ||
| 236 | } | ||
| 237 | const StorageBufferAddr storage_buffer{ | ||
| 238 | .index{index.U32()}, | ||
| 239 | .offset{offset.U32()}, | ||
| 240 | }; | ||
| 241 | if (bias && !MeetsBias(storage_buffer, *bias)) { | ||
| 242 | // We have to blacklist some addresses in case we wrongly point to them | ||
| 243 | return std::nullopt; | ||
| 244 | } | ||
| 245 | return storage_buffer; | ||
| 246 | } | ||
| 247 | |||
| 248 | /// Tries to track the storage buffer address used by a global memory instruction | ||
| 249 | std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { | ||
| 221 | if (value.IsImmediate()) { | 250 | if (value.IsImmediate()) { |
| 222 | // Immediates can't be a storage buffer | 251 | // Nothing to do with immediates |
| 223 | return std::nullopt; | 252 | return std::nullopt; |
| 224 | } | 253 | } |
| 225 | const IR::Inst* const inst{value.InstRecursive()}; | 254 | // Breadth-first search visiting the right most arguments first |
| 226 | if (inst->Opcode() == IR::Opcode::GetCbufU32) { | 255 | // Small vector has been determined from shaders in Super Smash Bros. Ultimate |
| 227 | const IR::Value index{inst->Arg(0)}; | 256 | small_vector<const IR::Inst*, 2> visited; |
| 228 | const IR::Value offset{inst->Arg(1)}; | 257 | std::queue<const IR::Inst*> queue; |
| 229 | if (!index.IsImmediate()) { | 258 | queue.push(value.InstRecursive()); |
| 230 | // Definitely not a storage buffer if it's read from a non-immediate index | 259 | |
| 231 | return std::nullopt; | 260 | while (!queue.empty()) { |
| 232 | } | 261 | // Pop one instruction from the queue |
| 233 | if (!offset.IsImmediate()) { | 262 | const IR::Inst* const inst{queue.front()}; |
| 234 | // TODO: Support SSBO arrays | 263 | queue.pop(); |
| 235 | return std::nullopt; | 264 | if (const std::optional<StorageBufferAddr> result = TryGetStorageBuffer(inst, bias)) { |
| 236 | } | 265 | // This is the instruction we were looking for |
| 237 | const StorageBufferAddr storage_buffer{ | 266 | return result; |
| 238 | .index{index.U32()}, | ||
| 239 | .offset{offset.U32()}, | ||
| 240 | }; | ||
| 241 | if (bias && !MeetsBias(storage_buffer, *bias)) { | ||
| 242 | // We have to blacklist some addresses in case we wrongly point to them | ||
| 243 | return std::nullopt; | ||
| 244 | } | 267 | } |
| 245 | return storage_buffer; | 268 | // Visit the right most arguments first |
| 246 | } | 269 | for (size_t arg = inst->NumArgs(); arg--;) { |
| 247 | // Reversed loops are more likely to find the right result | 270 | const IR::Value arg_value{inst->Arg(arg)}; |
| 248 | for (size_t arg = inst->NumArgs(); arg--;) { | 271 | if (arg_value.IsImmediate()) { |
| 249 | IR::Block* inst_block{block}; | ||
| 250 | if (inst->Opcode() == IR::Opcode::Phi) { | ||
| 251 | // If we are going through a phi node, mark the current block as visited | ||
| 252 | visited.insert(block); | ||
| 253 | // and skip already visited blocks to avoid looping forever | ||
| 254 | IR::Block* const phi_block{inst->PhiBlock(arg)}; | ||
| 255 | if (visited.contains(phi_block)) { | ||
| 256 | // Already visited, skip | ||
| 257 | continue; | 272 | continue; |
| 258 | } | 273 | } |
| 259 | inst_block = phi_block; | 274 | // Queue instruction if it hasn't been visited |
| 260 | } | 275 | const IR::Inst* const arg_inst{arg_value.InstRecursive()}; |
| 261 | const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; | 276 | if (std::ranges::find(visited, arg_inst) == visited.end()) { |
| 262 | if (storage_buffer) { | 277 | visited.push_back(arg_inst); |
| 263 | return *storage_buffer; | 278 | queue.push(arg_inst); |
| 279 | } | ||
| 264 | } | 280 | } |
| 265 | } | 281 | } |
| 282 | // SSA tree has been traversed and the origin hasn't been found | ||
| 266 | return std::nullopt; | 283 | return std::nullopt; |
| 267 | } | 284 | } |
| 268 | 285 | ||
| 269 | /// Collects the storage buffer used by a global memory instruction and the instruction itself | 286 | /// Collects the storage buffer used by a global memory instruction and the instruction itself |
| 270 | void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, | 287 | void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) { |
| 271 | StorageInstVector& to_replace, StorageWritesSet& writes_set) { | ||
| 272 | // NVN puts storage buffers in a specific range, we have to bias towards these addresses to | 288 | // NVN puts storage buffers in a specific range, we have to bias towards these addresses to |
| 273 | // avoid getting false positives | 289 | // avoid getting false positives |
| 274 | static constexpr Bias nvn_bias{ | 290 | static constexpr Bias nvn_bias{ |
| @@ -284,24 +300,23 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s | |||
| 284 | } | 300 | } |
| 285 | // First try to find storage buffers in the NVN address | 301 | // First try to find storage buffers in the NVN address |
| 286 | const IR::U32 low_addr{low_addr_info->value}; | 302 | const IR::U32 low_addr{low_addr_info->value}; |
| 287 | VisitedBlocks visited_blocks; | 303 | std::optional storage_buffer{Track(low_addr, &nvn_bias)}; |
| 288 | std::optional storage_buffer{Track(&block, low_addr, &nvn_bias, visited_blocks)}; | ||
| 289 | if (!storage_buffer) { | 304 | if (!storage_buffer) { |
| 290 | // If it fails, track without a bias | 305 | // If it fails, track without a bias |
| 291 | visited_blocks.clear(); | 306 | storage_buffer = Track(low_addr, nullptr); |
| 292 | storage_buffer = Track(&block, low_addr, nullptr, visited_blocks); | ||
| 293 | if (!storage_buffer) { | 307 | if (!storage_buffer) { |
| 294 | // If that also failed, drop the global memory usage | 308 | // If that also failed, drop the global memory usage |
| 309 | // LOG_ERROR | ||
| 295 | DiscardGlobalMemory(block, inst); | 310 | DiscardGlobalMemory(block, inst); |
| 296 | return; | 311 | return; |
| 297 | } | 312 | } |
| 298 | } | 313 | } |
| 299 | // Collect storage buffer and the instruction | 314 | // Collect storage buffer and the instruction |
| 300 | if (IsGlobalMemoryWrite(inst)) { | 315 | if (IsGlobalMemoryWrite(inst)) { |
| 301 | writes_set.insert(*storage_buffer); | 316 | info.writes.insert(*storage_buffer); |
| 302 | } | 317 | } |
| 303 | storage_buffer_set.insert(*storage_buffer); | 318 | info.set.insert(*storage_buffer); |
| 304 | to_replace.push_back(StorageInst{ | 319 | info.to_replace.push_back(StorageInst{ |
| 305 | .storage_buffer{*storage_buffer}, | 320 | .storage_buffer{*storage_buffer}, |
| 306 | .inst{&inst}, | 321 | .inst{&inst}, |
| 307 | .block{&block}, | 322 | .block{&block}, |
| @@ -371,33 +386,29 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | |||
| 371 | } // Anonymous namespace | 386 | } // Anonymous namespace |
| 372 | 387 | ||
| 373 | void GlobalMemoryToStorageBufferPass(IR::Program& program) { | 388 | void GlobalMemoryToStorageBufferPass(IR::Program& program) { |
| 374 | StorageBufferSet storage_buffers; | 389 | StorageInfo info; |
| 375 | StorageInstVector to_replace; | ||
| 376 | StorageWritesSet writes_set; | ||
| 377 | |||
| 378 | for (IR::Block* const block : program.post_order_blocks) { | 390 | for (IR::Block* const block : program.post_order_blocks) { |
| 379 | for (IR::Inst& inst : block->Instructions()) { | 391 | for (IR::Inst& inst : block->Instructions()) { |
| 380 | if (!IsGlobalMemory(inst)) { | 392 | if (!IsGlobalMemory(inst)) { |
| 381 | continue; | 393 | continue; |
| 382 | } | 394 | } |
| 383 | CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_set); | 395 | CollectStorageBuffers(*block, inst, info); |
| 384 | } | 396 | } |
| 385 | } | 397 | } |
| 386 | Info& info{program.info}; | ||
| 387 | u32 storage_index{}; | 398 | u32 storage_index{}; |
| 388 | for (const StorageBufferAddr& storage_buffer : storage_buffers) { | 399 | for (const StorageBufferAddr& storage_buffer : info.set) { |
| 389 | info.storage_buffers_descriptors.push_back({ | 400 | program.info.storage_buffers_descriptors.push_back({ |
| 390 | .cbuf_index{storage_buffer.index}, | 401 | .cbuf_index{storage_buffer.index}, |
| 391 | .cbuf_offset{storage_buffer.offset}, | 402 | .cbuf_offset{storage_buffer.offset}, |
| 392 | .count{1}, | 403 | .count{1}, |
| 393 | .is_written{writes_set.contains(storage_buffer)}, | 404 | .is_written{info.writes.contains(storage_buffer)}, |
| 394 | }); | 405 | }); |
| 395 | ++storage_index; | 406 | ++storage_index; |
| 396 | } | 407 | } |
| 397 | for (const StorageInst& storage_inst : to_replace) { | 408 | for (const StorageInst& storage_inst : info.to_replace) { |
| 398 | const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; | 409 | const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; |
| 399 | const auto it{storage_buffers.find(storage_inst.storage_buffer)}; | 410 | const auto it{info.set.find(storage_inst.storage_buffer)}; |
| 400 | const IR::U32 index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}}; | 411 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; |
| 401 | IR::Block* const block{storage_inst.block}; | 412 | IR::Block* const block{storage_inst.block}; |
| 402 | IR::Inst* const inst{storage_inst.inst}; | 413 | IR::Inst* const inst{storage_inst.inst}; |
| 403 | const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; | 414 | const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; |