diff options
| author | 2021-04-19 16:33:23 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:28 -0400 | |
| commit | 7018e524f5e6217b3259333acc4ea09ad036d331 (patch) | |
| tree | 58e750b08d48e018accc4de9a05cb483d825904c /src/shader_recompiler/ir_opt | |
| parent | spirv: Fix ViewportMask (diff) | |
| download | yuzu-7018e524f5e6217b3259333acc4ea09ad036d331.tar.gz yuzu-7018e524f5e6217b3259333acc4ea09ad036d331.tar.xz yuzu-7018e524f5e6217b3259333acc4ea09ad036d331.zip | |
shader: Add NVN storage buffer fallbacks
When we can't track the SSBO origin of a global memory instruction,
leave it as a global memory operation and assume these pointers are in
the NVN storage buffer slots, then apply a linear search in the shader's
runtime.
Diffstat (limited to 'src/shader_recompiler/ir_opt')
| -rw-r--r-- | src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 53 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | 46 |
2 files changed, 61 insertions, 38 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 0500a5141..cccf0909d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -187,6 +187,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 187 | case IR::Opcode::FPUnordGreaterThanEqual16: | 187 | case IR::Opcode::FPUnordGreaterThanEqual16: |
| 188 | case IR::Opcode::FPIsNan16: | 188 | case IR::Opcode::FPIsNan16: |
| 189 | case IR::Opcode::GlobalAtomicAddF16x2: | 189 | case IR::Opcode::GlobalAtomicAddF16x2: |
| 190 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 191 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 190 | case IR::Opcode::StorageAtomicAddF16x2: | 192 | case IR::Opcode::StorageAtomicAddF16x2: |
| 191 | case IR::Opcode::StorageAtomicMinF16x2: | 193 | case IR::Opcode::StorageAtomicMinF16x2: |
| 192 | case IR::Opcode::StorageAtomicMaxF16x2: | 194 | case IR::Opcode::StorageAtomicMaxF16x2: |
| @@ -373,7 +375,58 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 373 | case IR::Opcode::StorageAtomicAnd64: | 375 | case IR::Opcode::StorageAtomicAnd64: |
| 374 | case IR::Opcode::StorageAtomicOr64: | 376 | case IR::Opcode::StorageAtomicOr64: |
| 375 | case IR::Opcode::StorageAtomicXor64: | 377 | case IR::Opcode::StorageAtomicXor64: |
| 378 | case IR::Opcode::StorageAtomicExchange64: | ||
| 379 | info.uses_int64 = true; | ||
| 380 | break; | ||
| 381 | default: | ||
| 382 | break; | ||
| 383 | } | ||
| 384 | switch (inst.GetOpcode()) { | ||
| 385 | case IR::Opcode::LoadGlobalU8: | ||
| 386 | case IR::Opcode::LoadGlobalS8: | ||
| 387 | case IR::Opcode::LoadGlobalU16: | ||
| 388 | case IR::Opcode::LoadGlobalS16: | ||
| 389 | case IR::Opcode::LoadGlobal32: | ||
| 390 | case IR::Opcode::LoadGlobal64: | ||
| 391 | case IR::Opcode::LoadGlobal128: | ||
| 392 | case IR::Opcode::WriteGlobalU8: | ||
| 393 | case IR::Opcode::WriteGlobalS8: | ||
| 394 | case IR::Opcode::WriteGlobalU16: | ||
| 395 | case IR::Opcode::WriteGlobalS16: | ||
| 396 | case IR::Opcode::WriteGlobal32: | ||
| 397 | case IR::Opcode::WriteGlobal64: | ||
| 398 | case IR::Opcode::WriteGlobal128: | ||
| 399 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 400 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 401 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 402 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 403 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 404 | case IR::Opcode::GlobalAtomicInc32: | ||
| 405 | case IR::Opcode::GlobalAtomicDec32: | ||
| 406 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 407 | case IR::Opcode::GlobalAtomicOr32: | ||
| 408 | case IR::Opcode::GlobalAtomicXor32: | ||
| 409 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 410 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 411 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 412 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 413 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 414 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 415 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 416 | case IR::Opcode::GlobalAtomicOr64: | ||
| 417 | case IR::Opcode::GlobalAtomicXor64: | ||
| 418 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 419 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 420 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 421 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 422 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 423 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 424 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 425 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 376 | info.uses_int64 = true; | 426 | info.uses_int64 = true; |
| 427 | info.uses_global_memory = true; | ||
| 428 | info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; | ||
| 429 | info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4; | ||
| 377 | break; | 430 | break; |
| 378 | default: | 431 | default: |
| 379 | break; | 432 | break; |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 378a3a915..f294d297f 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <boost/container/flat_set.hpp> | 11 | #include <boost/container/flat_set.hpp> |
| 12 | #include <boost/container/small_vector.hpp> | 12 | #include <boost/container/small_vector.hpp> |
| 13 | 13 | ||
| 14 | #include "common/alignment.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/basic_block.h" | 15 | #include "shader_recompiler/frontend/ir/basic_block.h" |
| 15 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | 16 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" |
| 16 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 17 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| @@ -244,39 +245,6 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce | |||
| 244 | storage_buffer.offset < bias.offset_end; | 245 | storage_buffer.offset < bias.offset_end; |
| 245 | } | 246 | } |
| 246 | 247 | ||
| 247 | /// Discards a global memory operation, reads return zero and writes are ignored | ||
| 248 | void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { | ||
| 249 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 250 | const IR::Value zero{u32{0}}; | ||
| 251 | switch (inst.GetOpcode()) { | ||
| 252 | case IR::Opcode::LoadGlobalS8: | ||
| 253 | case IR::Opcode::LoadGlobalU8: | ||
| 254 | case IR::Opcode::LoadGlobalS16: | ||
| 255 | case IR::Opcode::LoadGlobalU16: | ||
| 256 | case IR::Opcode::LoadGlobal32: | ||
| 257 | inst.ReplaceUsesWith(zero); | ||
| 258 | break; | ||
| 259 | case IR::Opcode::LoadGlobal64: | ||
| 260 | inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)}); | ||
| 261 | break; | ||
| 262 | case IR::Opcode::LoadGlobal128: | ||
| 263 | inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)}); | ||
| 264 | break; | ||
| 265 | case IR::Opcode::WriteGlobalS8: | ||
| 266 | case IR::Opcode::WriteGlobalU8: | ||
| 267 | case IR::Opcode::WriteGlobalS16: | ||
| 268 | case IR::Opcode::WriteGlobalU16: | ||
| 269 | case IR::Opcode::WriteGlobal32: | ||
| 270 | case IR::Opcode::WriteGlobal64: | ||
| 271 | case IR::Opcode::WriteGlobal128: | ||
| 272 | inst.Invalidate(); | ||
| 273 | break; | ||
| 274 | default: | ||
| 275 | throw LogicError("Invalid opcode to discard its global memory operation {}", | ||
| 276 | inst.GetOpcode()); | ||
| 277 | } | ||
| 278 | } | ||
| 279 | |||
| 280 | struct LowAddrInfo { | 248 | struct LowAddrInfo { |
| 281 | IR::U32 value; | 249 | IR::U32 value; |
| 282 | s32 imm_offset; | 250 | s32 imm_offset; |
| @@ -350,6 +318,10 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) | |||
| 350 | .index{index.U32()}, | 318 | .index{index.U32()}, |
| 351 | .offset{offset.U32()}, | 319 | .offset{offset.U32()}, |
| 352 | }; | 320 | }; |
| 321 | if (!Common::IsAligned(storage_buffer.offset, 16)) { | ||
| 322 | // The SSBO pointer has to be aligned | ||
| 323 | return std::nullopt; | ||
| 324 | } | ||
| 353 | if (bias && !MeetsBias(storage_buffer, *bias)) { | 325 | if (bias && !MeetsBias(storage_buffer, *bias)) { |
| 354 | // We have to blacklist some addresses in case we wrongly | 326 | // We have to blacklist some addresses in case we wrongly |
| 355 | // point to them | 327 | // point to them |
| @@ -372,19 +344,17 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) | |||
| 372 | // Track the low address of the instruction | 344 | // Track the low address of the instruction |
| 373 | const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; | 345 | const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; |
| 374 | if (!low_addr_info) { | 346 | if (!low_addr_info) { |
| 375 | DiscardGlobalMemory(block, inst); | 347 | // Failed to track the low address, use NVN fallbacks |
| 376 | return; | 348 | return; |
| 377 | } | 349 | } |
| 378 | // First try to find storage buffers in the NVN address | 350 | // First try to find storage buffers in the NVN address |
| 379 | const IR::U32 low_addr{low_addr_info->value}; | 351 | const IR::U32 low_addr{low_addr_info->value}; |
| 380 | std::optional storage_buffer{Track(low_addr, &nvn_bias)}; | 352 | std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; |
| 381 | if (!storage_buffer) { | 353 | if (!storage_buffer) { |
| 382 | // If it fails, track without a bias | 354 | // If it fails, track without a bias |
| 383 | storage_buffer = Track(low_addr, nullptr); | 355 | storage_buffer = Track(low_addr, nullptr); |
| 384 | if (!storage_buffer) { | 356 | if (!storage_buffer) { |
| 385 | // If that also failed, drop the global memory usage | 357 | // If that also fails, use NVN fallbacks |
| 386 | // LOG_ERROR | ||
| 387 | DiscardGlobalMemory(block, inst); | ||
| 388 | return; | 358 | return; |
| 389 | } | 359 | } |
| 390 | } | 360 | } |