summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-04-03 05:18:12 -0300
committerGravatar ameerj2021-07-22 21:51:26 -0400
commit9a342f5605aef385612053d7d8b564f541952eae (patch)
tree0a52f80b351e9664c1f14cb50672c71398c001e7 /src
parentshader: Fix fp16 merge when using native fp16 (diff)
downloadyuzu-9a342f5605aef385612053d7d8b564f541952eae.tar.gz
yuzu-9a342f5605aef385612053d7d8b564f541952eae.tar.xz
yuzu-9a342f5605aef385612053d7d8b564f541952eae.zip
shader: Rework global memory tracking to use breadth-first search
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp149
1 files changed, 80 insertions, 69 deletions
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index c8bd7b329..f94c82e21 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -4,9 +4,9 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <compare> 6#include <compare>
7#include <map>
8#include <optional> 7#include <optional>
9#include <ranges> 8#include <ranges>
9#include <queue>
10 10
11#include <boost/container/flat_set.hpp> 11#include <boost/container/flat_set.hpp>
12#include <boost/container/small_vector.hpp> 12#include <boost/container/small_vector.hpp>
@@ -40,15 +40,19 @@ struct Bias {
40 u32 offset_end; 40 u32 offset_end;
41}; 41};
42 42
43using boost::container::flat_set;
44using boost::container::small_vector;
43using StorageBufferSet = 45using StorageBufferSet =
44 boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, 46 flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
45 boost::container::small_vector<StorageBufferAddr, 16>>; 47using StorageInstVector = small_vector<StorageInst, 24>;
46using StorageInstVector = boost::container::small_vector<StorageInst, 24>;
47using VisitedBlocks = boost::container::flat_set<IR::Block*, std::less<IR::Block*>,
48 boost::container::small_vector<IR::Block*, 4>>;
49using StorageWritesSet = 48using StorageWritesSet =
50 boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, 49 flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
51 boost::container::small_vector<StorageBufferAddr, 16>>; 50
51struct StorageInfo {
52 StorageBufferSet set;
53 StorageInstVector to_replace;
54 StorageWritesSet writes;
55};
52 56
53/// Returns true when the instruction is a global memory instruction 57/// Returns true when the instruction is a global memory instruction
54bool IsGlobalMemory(const IR::Inst& inst) { 58bool IsGlobalMemory(const IR::Inst& inst) {
@@ -215,60 +219,72 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
215 }; 219 };
216} 220}
217 221
218/// Recursively tries to track the storage buffer address used by a global memory instruction 222/// Tries to get the storage buffer out of a constant buffer read instruction
219std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value, const Bias* bias, 223std::optional<StorageBufferAddr> TryGetStorageBuffer(const IR::Inst* inst, const Bias* bias) {
220 VisitedBlocks& visited) { 224 if (inst->Opcode() != IR::Opcode::GetCbufU32) {
225 return std::nullopt;
226 }
227 const IR::Value index{inst->Arg(0)};
228 const IR::Value offset{inst->Arg(1)};
229 if (!index.IsImmediate()) {
230 // Definitely not a storage buffer if it's read from a non-immediate index
231 return std::nullopt;
232 }
233 if (!offset.IsImmediate()) {
234 // TODO: Support SSBO arrays
235 return std::nullopt;
236 }
237 const StorageBufferAddr storage_buffer{
238 .index{index.U32()},
239 .offset{offset.U32()},
240 };
241 if (bias && !MeetsBias(storage_buffer, *bias)) {
242 // We have to blacklist some addresses in case we wrongly point to them
243 return std::nullopt;
244 }
245 return storage_buffer;
246}
247
248/// Tries to track the storage buffer address used by a global memory instruction
249std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
221 if (value.IsImmediate()) { 250 if (value.IsImmediate()) {
222 // Immediates can't be a storage buffer 251 // Nothing to do with immediates
223 return std::nullopt; 252 return std::nullopt;
224 } 253 }
225 const IR::Inst* const inst{value.InstRecursive()}; 254 // Breadth-first search visiting the right most arguments first
226 if (inst->Opcode() == IR::Opcode::GetCbufU32) { 255 // Small vector has been determined from shaders in Super Smash Bros. Ultimate
227 const IR::Value index{inst->Arg(0)}; 256 small_vector<const IR::Inst*, 2> visited;
228 const IR::Value offset{inst->Arg(1)}; 257 std::queue<const IR::Inst*> queue;
229 if (!index.IsImmediate()) { 258 queue.push(value.InstRecursive());
230 // Definitely not a storage buffer if it's read from a non-immediate index 259
231 return std::nullopt; 260 while (!queue.empty()) {
232 } 261 // Pop one instruction from the queue
233 if (!offset.IsImmediate()) { 262 const IR::Inst* const inst{queue.front()};
234 // TODO: Support SSBO arrays 263 queue.pop();
235 return std::nullopt; 264 if (const std::optional<StorageBufferAddr> result = TryGetStorageBuffer(inst, bias)) {
236 } 265 // This is the instruction we were looking for
237 const StorageBufferAddr storage_buffer{ 266 return result;
238 .index{index.U32()},
239 .offset{offset.U32()},
240 };
241 if (bias && !MeetsBias(storage_buffer, *bias)) {
242 // We have to blacklist some addresses in case we wrongly point to them
243 return std::nullopt;
244 } 267 }
245 return storage_buffer; 268 // Visit the right most arguments first
246 } 269 for (size_t arg = inst->NumArgs(); arg--;) {
247 // Reversed loops are more likely to find the right result 270 const IR::Value arg_value{inst->Arg(arg)};
248 for (size_t arg = inst->NumArgs(); arg--;) { 271 if (arg_value.IsImmediate()) {
249 IR::Block* inst_block{block};
250 if (inst->Opcode() == IR::Opcode::Phi) {
251 // If we are going through a phi node, mark the current block as visited
252 visited.insert(block);
253 // and skip already visited blocks to avoid looping forever
254 IR::Block* const phi_block{inst->PhiBlock(arg)};
255 if (visited.contains(phi_block)) {
256 // Already visited, skip
257 continue; 272 continue;
258 } 273 }
259 inst_block = phi_block; 274 // Queue instruction if it hasn't been visited
260 } 275 const IR::Inst* const arg_inst{arg_value.InstRecursive()};
261 const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; 276 if (std::ranges::find(visited, arg_inst) == visited.end()) {
262 if (storage_buffer) { 277 visited.push_back(arg_inst);
263 return *storage_buffer; 278 queue.push(arg_inst);
279 }
264 } 280 }
265 } 281 }
282 // SSA tree has been traversed and the origin hasn't been found
266 return std::nullopt; 283 return std::nullopt;
267} 284}
268 285
269/// Collects the storage buffer used by a global memory instruction and the instruction itself 286/// Collects the storage buffer used by a global memory instruction and the instruction itself
270void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, 287void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
271 StorageInstVector& to_replace, StorageWritesSet& writes_set) {
272 // NVN puts storage buffers in a specific range, we have to bias towards these addresses to 288 // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
273 // avoid getting false positives 289 // avoid getting false positives
274 static constexpr Bias nvn_bias{ 290 static constexpr Bias nvn_bias{
@@ -284,24 +300,23 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s
284 } 300 }
285 // First try to find storage buffers in the NVN address 301 // First try to find storage buffers in the NVN address
286 const IR::U32 low_addr{low_addr_info->value}; 302 const IR::U32 low_addr{low_addr_info->value};
287 VisitedBlocks visited_blocks; 303 std::optional storage_buffer{Track(low_addr, &nvn_bias)};
288 std::optional storage_buffer{Track(&block, low_addr, &nvn_bias, visited_blocks)};
289 if (!storage_buffer) { 304 if (!storage_buffer) {
290 // If it fails, track without a bias 305 // If it fails, track without a bias
291 visited_blocks.clear(); 306 storage_buffer = Track(low_addr, nullptr);
292 storage_buffer = Track(&block, low_addr, nullptr, visited_blocks);
293 if (!storage_buffer) { 307 if (!storage_buffer) {
294 // If that also failed, drop the global memory usage 308 // If that also failed, drop the global memory usage
309 // LOG_ERROR
295 DiscardGlobalMemory(block, inst); 310 DiscardGlobalMemory(block, inst);
296 return; 311 return;
297 } 312 }
298 } 313 }
299 // Collect storage buffer and the instruction 314 // Collect storage buffer and the instruction
300 if (IsGlobalMemoryWrite(inst)) { 315 if (IsGlobalMemoryWrite(inst)) {
301 writes_set.insert(*storage_buffer); 316 info.writes.insert(*storage_buffer);
302 } 317 }
303 storage_buffer_set.insert(*storage_buffer); 318 info.set.insert(*storage_buffer);
304 to_replace.push_back(StorageInst{ 319 info.to_replace.push_back(StorageInst{
305 .storage_buffer{*storage_buffer}, 320 .storage_buffer{*storage_buffer},
306 .inst{&inst}, 321 .inst{&inst},
307 .block{&block}, 322 .block{&block},
@@ -371,33 +386,29 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
371} // Anonymous namespace 386} // Anonymous namespace
372 387
373void GlobalMemoryToStorageBufferPass(IR::Program& program) { 388void GlobalMemoryToStorageBufferPass(IR::Program& program) {
374 StorageBufferSet storage_buffers; 389 StorageInfo info;
375 StorageInstVector to_replace;
376 StorageWritesSet writes_set;
377
378 for (IR::Block* const block : program.post_order_blocks) { 390 for (IR::Block* const block : program.post_order_blocks) {
379 for (IR::Inst& inst : block->Instructions()) { 391 for (IR::Inst& inst : block->Instructions()) {
380 if (!IsGlobalMemory(inst)) { 392 if (!IsGlobalMemory(inst)) {
381 continue; 393 continue;
382 } 394 }
383 CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_set); 395 CollectStorageBuffers(*block, inst, info);
384 } 396 }
385 } 397 }
386 Info& info{program.info};
387 u32 storage_index{}; 398 u32 storage_index{};
388 for (const StorageBufferAddr& storage_buffer : storage_buffers) { 399 for (const StorageBufferAddr& storage_buffer : info.set) {
389 info.storage_buffers_descriptors.push_back({ 400 program.info.storage_buffers_descriptors.push_back({
390 .cbuf_index{storage_buffer.index}, 401 .cbuf_index{storage_buffer.index},
391 .cbuf_offset{storage_buffer.offset}, 402 .cbuf_offset{storage_buffer.offset},
392 .count{1}, 403 .count{1},
393 .is_written{writes_set.contains(storage_buffer)}, 404 .is_written{info.writes.contains(storage_buffer)},
394 }); 405 });
395 ++storage_index; 406 ++storage_index;
396 } 407 }
397 for (const StorageInst& storage_inst : to_replace) { 408 for (const StorageInst& storage_inst : info.to_replace) {
398 const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; 409 const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
399 const auto it{storage_buffers.find(storage_inst.storage_buffer)}; 410 const auto it{info.set.find(storage_inst.storage_buffer)};
400 const IR::U32 index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}}; 411 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
401 IR::Block* const block{storage_inst.block}; 412 IR::Block* const block{storage_inst.block};
402 IR::Inst* const inst{storage_inst.inst}; 413 IR::Inst* const inst{storage_inst.inst};
403 const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; 414 const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};