diff options
Diffstat (limited to 'src/shader_recompiler/ir_opt')
3 files changed, 201 insertions, 2 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 9ef8688c9..73373576b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 145 | case IR::Opcode::FPOrdGreaterThanEqual16: | 145 | case IR::Opcode::FPOrdGreaterThanEqual16: |
| 146 | case IR::Opcode::FPUnordGreaterThanEqual16: | 146 | case IR::Opcode::FPUnordGreaterThanEqual16: |
| 147 | case IR::Opcode::FPIsNan16: | 147 | case IR::Opcode::FPIsNan16: |
| 148 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 149 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 150 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 151 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 148 | info.uses_fp16 = true; | 152 | info.uses_fp16 = true; |
| 149 | break; | 153 | break; |
| 150 | case IR::Opcode::CompositeConstructF64x2: | 154 | case IR::Opcode::CompositeConstructF64x2: |
| @@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 310 | case IR::Opcode::ConvertF16U64: | 314 | case IR::Opcode::ConvertF16U64: |
| 311 | case IR::Opcode::ConvertF32U64: | 315 | case IR::Opcode::ConvertF32U64: |
| 312 | case IR::Opcode::ConvertF64U64: | 316 | case IR::Opcode::ConvertF64U64: |
| 317 | case IR::Opcode::SharedAtomicExchange64: | ||
| 313 | info.uses_int64 = true; | 318 | info.uses_int64 = true; |
| 314 | break; | 319 | break; |
| 315 | default: | 320 | default: |
| @@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 444 | case IR::Opcode::FSwizzleAdd: | 449 | case IR::Opcode::FSwizzleAdd: |
| 445 | info.uses_fswzadd = true; | 450 | info.uses_fswzadd = true; |
| 446 | break; | 451 | break; |
| 452 | case IR::Opcode::SharedAtomicInc32: | ||
| 453 | info.uses_shared_increment = true; | ||
| 454 | break; | ||
| 455 | case IR::Opcode::SharedAtomicDec32: | ||
| 456 | info.uses_shared_decrement = true; | ||
| 457 | break; | ||
| 458 | case IR::Opcode::GlobalAtomicInc32: | ||
| 459 | case IR::Opcode::StorageAtomicInc32: | ||
| 460 | info.uses_global_increment = true; | ||
| 461 | break; | ||
| 462 | case IR::Opcode::GlobalAtomicDec32: | ||
| 463 | case IR::Opcode::StorageAtomicDec32: | ||
| 464 | info.uses_global_decrement = true; | ||
| 465 | break; | ||
| 466 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 467 | case IR::Opcode::StorageAtomicAddF32: | ||
| 468 | info.uses_atomic_f32_add = true; | ||
| 469 | break; | ||
| 470 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 471 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 472 | info.uses_atomic_f16x2_add = true; | ||
| 473 | break; | ||
| 474 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 475 | case IR::Opcode::StorageAtomicAddF32x2: | ||
| 476 | info.uses_atomic_f32x2_add = true; | ||
| 477 | break; | ||
| 478 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 479 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 480 | info.uses_atomic_f16x2_min = true; | ||
| 481 | break; | ||
| 482 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 483 | case IR::Opcode::StorageAtomicMinF32x2: | ||
| 484 | info.uses_atomic_f32x2_min = true; | ||
| 485 | break; | ||
| 486 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 487 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 488 | info.uses_atomic_f16x2_max = true; | ||
| 489 | break; | ||
| 490 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 491 | case IR::Opcode::StorageAtomicMaxF32x2: | ||
| 492 | info.uses_atomic_f32x2_max = true; | ||
| 493 | break; | ||
| 494 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 495 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 496 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 497 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 498 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 499 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 500 | case IR::Opcode::GlobalAtomicOr64: | ||
| 501 | case IR::Opcode::GlobalAtomicXor64: | ||
| 502 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 503 | case IR::Opcode::StorageAtomicIAdd64: | ||
| 504 | case IR::Opcode::StorageAtomicSMin64: | ||
| 505 | case IR::Opcode::StorageAtomicUMin64: | ||
| 506 | case IR::Opcode::StorageAtomicSMax64: | ||
| 507 | case IR::Opcode::StorageAtomicUMax64: | ||
| 508 | case IR::Opcode::StorageAtomicAnd64: | ||
| 509 | case IR::Opcode::StorageAtomicOr64: | ||
| 510 | case IR::Opcode::StorageAtomicXor64: | ||
| 511 | info.uses_64_bit_atomics = true; | ||
| 512 | break; | ||
| 513 | case IR::Opcode::SharedAtomicExchange64: | ||
| 514 | info.uses_64_bit_atomics = true; | ||
| 515 | info.uses_shared_memory_u32x2 = true; | ||
| 516 | break; | ||
| 447 | default: | 517 | default: |
| 448 | break; | 518 | break; |
| 449 | } | 519 | } |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index afe871505..0d4f266c3 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) { | |||
| 72 | case IR::Opcode::WriteGlobal32: | 72 | case IR::Opcode::WriteGlobal32: |
| 73 | case IR::Opcode::WriteGlobal64: | 73 | case IR::Opcode::WriteGlobal64: |
| 74 | case IR::Opcode::WriteGlobal128: | 74 | case IR::Opcode::WriteGlobal128: |
| 75 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 76 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 77 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 78 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 79 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 80 | case IR::Opcode::GlobalAtomicInc32: | ||
| 81 | case IR::Opcode::GlobalAtomicDec32: | ||
| 82 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 83 | case IR::Opcode::GlobalAtomicOr32: | ||
| 84 | case IR::Opcode::GlobalAtomicXor32: | ||
| 85 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 86 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 87 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 88 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 89 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 90 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 91 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 92 | case IR::Opcode::GlobalAtomicOr64: | ||
| 93 | case IR::Opcode::GlobalAtomicXor64: | ||
| 94 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 95 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 96 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 97 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 98 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 99 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 100 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 101 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 75 | return true; | 102 | return true; |
| 76 | default: | 103 | default: |
| 77 | return false; | 104 | return false; |
| @@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { | |||
| 125 | return IR::Opcode::WriteStorage64; | 152 | return IR::Opcode::WriteStorage64; |
| 126 | case IR::Opcode::WriteGlobal128: | 153 | case IR::Opcode::WriteGlobal128: |
| 127 | return IR::Opcode::WriteStorage128; | 154 | return IR::Opcode::WriteStorage128; |
| 155 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 156 | return IR::Opcode::StorageAtomicIAdd32; | ||
| 157 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 158 | return IR::Opcode::StorageAtomicSMin32; | ||
| 159 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 160 | return IR::Opcode::StorageAtomicUMin32; | ||
| 161 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 162 | return IR::Opcode::StorageAtomicSMax32; | ||
| 163 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 164 | return IR::Opcode::StorageAtomicUMax32; | ||
| 165 | case IR::Opcode::GlobalAtomicInc32: | ||
| 166 | return IR::Opcode::StorageAtomicInc32; | ||
| 167 | case IR::Opcode::GlobalAtomicDec32: | ||
| 168 | return IR::Opcode::StorageAtomicDec32; | ||
| 169 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 170 | return IR::Opcode::StorageAtomicAnd32; | ||
| 171 | case IR::Opcode::GlobalAtomicOr32: | ||
| 172 | return IR::Opcode::StorageAtomicOr32; | ||
| 173 | case IR::Opcode::GlobalAtomicXor32: | ||
| 174 | return IR::Opcode::StorageAtomicXor32; | ||
| 175 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 176 | return IR::Opcode::StorageAtomicIAdd64; | ||
| 177 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 178 | return IR::Opcode::StorageAtomicSMin64; | ||
| 179 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 180 | return IR::Opcode::StorageAtomicUMin64; | ||
| 181 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 182 | return IR::Opcode::StorageAtomicSMax64; | ||
| 183 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 184 | return IR::Opcode::StorageAtomicUMax64; | ||
| 185 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 186 | return IR::Opcode::StorageAtomicAnd64; | ||
| 187 | case IR::Opcode::GlobalAtomicOr64: | ||
| 188 | return IR::Opcode::StorageAtomicOr64; | ||
| 189 | case IR::Opcode::GlobalAtomicXor64: | ||
| 190 | return IR::Opcode::StorageAtomicXor64; | ||
| 191 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 192 | return IR::Opcode::StorageAtomicExchange32; | ||
| 193 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 194 | return IR::Opcode::StorageAtomicExchange64; | ||
| 195 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 196 | return IR::Opcode::StorageAtomicAddF32; | ||
| 197 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 198 | return IR::Opcode::StorageAtomicAddF16x2; | ||
| 199 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 200 | return IR::Opcode::StorageAtomicMinF16x2; | ||
| 201 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 202 | return IR::Opcode::StorageAtomicMaxF16x2; | ||
| 203 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 204 | return IR::Opcode::StorageAtomicAddF32x2; | ||
| 205 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 206 | return IR::Opcode::StorageAtomicMinF32x2; | ||
| 207 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 208 | return IR::Opcode::StorageAtomicMaxF32x2; | ||
| 128 | default: | 209 | default: |
| 129 | throw InvalidArgument("Invalid global memory opcode {}", opcode); | 210 | throw InvalidArgument("Invalid global memory opcode {}", opcode); |
| 130 | } | 211 | } |
| @@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index | |||
| 328 | inst.Invalidate(); | 409 | inst.Invalidate(); |
| 329 | } | 410 | } |
| 330 | 411 | ||
| 412 | /// Replace an atomic operation on global memory instruction with its storage buffer equivalent | ||
| 413 | void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||
| 414 | const IR::U32& offset) { | ||
| 415 | const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; | ||
| 416 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 417 | const IR::Value value{ | ||
| 418 | &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})}; | ||
| 419 | inst.ReplaceUsesWith(value); | ||
| 420 | } | ||
| 421 | |||
| 331 | /// Replace a global memory instruction with its storage buffer equivalent | 422 | /// Replace a global memory instruction with its storage buffer equivalent |
| 332 | void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | 423 | void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, |
| 333 | const IR::U32& offset) { | 424 | const IR::U32& offset) { |
| @@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | |||
| 348 | case IR::Opcode::WriteGlobal64: | 439 | case IR::Opcode::WriteGlobal64: |
| 349 | case IR::Opcode::WriteGlobal128: | 440 | case IR::Opcode::WriteGlobal128: |
| 350 | return ReplaceWrite(block, inst, storage_index, offset); | 441 | return ReplaceWrite(block, inst, storage_index, offset); |
| 442 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 443 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 444 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 445 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 446 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 447 | case IR::Opcode::GlobalAtomicInc32: | ||
| 448 | case IR::Opcode::GlobalAtomicDec32: | ||
| 449 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 450 | case IR::Opcode::GlobalAtomicOr32: | ||
| 451 | case IR::Opcode::GlobalAtomicXor32: | ||
| 452 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 453 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 454 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 455 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 456 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 457 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 458 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 459 | case IR::Opcode::GlobalAtomicOr64: | ||
| 460 | case IR::Opcode::GlobalAtomicXor64: | ||
| 461 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 462 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 463 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 464 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 465 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 466 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 467 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 468 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 469 | return ReplaceAtomic(block, inst, storage_index, offset); | ||
| 351 | default: | 470 | default: |
| 352 | throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); | 471 | throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); |
| 353 | } | 472 | } |
| @@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||
| 364 | CollectStorageBuffers(*block, inst, info); | 483 | CollectStorageBuffers(*block, inst, info); |
| 365 | } | 484 | } |
| 366 | } | 485 | } |
| 367 | u32 storage_index{}; | ||
| 368 | for (const StorageBufferAddr& storage_buffer : info.set) { | 486 | for (const StorageBufferAddr& storage_buffer : info.set) { |
| 369 | program.info.storage_buffers_descriptors.push_back({ | 487 | program.info.storage_buffers_descriptors.push_back({ |
| 370 | .cbuf_index = storage_buffer.index, | 488 | .cbuf_index = storage_buffer.index, |
| @@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||
| 372 | .count = 1, | 490 | .count = 1, |
| 373 | .is_written{info.writes.contains(storage_buffer)}, | 491 | .is_written{info.writes.contains(storage_buffer)}, |
| 374 | }); | 492 | }); |
| 375 | ++storage_index; | ||
| 376 | } | 493 | } |
| 377 | for (const StorageInst& storage_inst : info.to_replace) { | 494 | for (const StorageInst& storage_inst : info.to_replace) { |
| 378 | const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; | 495 | const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; |
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 52576b07f..62e73d52d 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | |||
| @@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) { | |||
| 114 | return IR::Opcode::ConvertF32U32; | 114 | return IR::Opcode::ConvertF32U32; |
| 115 | case IR::Opcode::ConvertF16U64: | 115 | case IR::Opcode::ConvertF16U64: |
| 116 | return IR::Opcode::ConvertF32U64; | 116 | return IR::Opcode::ConvertF32U64; |
| 117 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 118 | return IR::Opcode::GlobalAtomicAddF32x2; | ||
| 119 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 120 | return IR::Opcode::StorageAtomicAddF32x2; | ||
| 121 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 122 | return IR::Opcode::GlobalAtomicMinF32x2; | ||
| 123 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 124 | return IR::Opcode::StorageAtomicMinF32x2; | ||
| 125 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 126 | return IR::Opcode::GlobalAtomicMaxF32x2; | ||
| 127 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 128 | return IR::Opcode::StorageAtomicMaxF32x2; | ||
| 117 | default: | 129 | default: |
| 118 | return op; | 130 | return op; |
| 119 | } | 131 | } |