summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/ir_opt
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/ir_opt')
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp70
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp121
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp12
3 files changed, 201 insertions, 2 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 9ef8688c9..73373576b 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) {
145 case IR::Opcode::FPOrdGreaterThanEqual16: 145 case IR::Opcode::FPOrdGreaterThanEqual16:
146 case IR::Opcode::FPUnordGreaterThanEqual16: 146 case IR::Opcode::FPUnordGreaterThanEqual16:
147 case IR::Opcode::FPIsNan16: 147 case IR::Opcode::FPIsNan16:
148 case IR::Opcode::GlobalAtomicAddF16x2:
149 case IR::Opcode::StorageAtomicAddF16x2:
150 case IR::Opcode::StorageAtomicMinF16x2:
151 case IR::Opcode::StorageAtomicMaxF16x2:
148 info.uses_fp16 = true; 152 info.uses_fp16 = true;
149 break; 153 break;
150 case IR::Opcode::CompositeConstructF64x2: 154 case IR::Opcode::CompositeConstructF64x2:
@@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
310 case IR::Opcode::ConvertF16U64: 314 case IR::Opcode::ConvertF16U64:
311 case IR::Opcode::ConvertF32U64: 315 case IR::Opcode::ConvertF32U64:
312 case IR::Opcode::ConvertF64U64: 316 case IR::Opcode::ConvertF64U64:
317 case IR::Opcode::SharedAtomicExchange64:
313 info.uses_int64 = true; 318 info.uses_int64 = true;
314 break; 319 break;
315 default: 320 default:
@@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) {
444 case IR::Opcode::FSwizzleAdd: 449 case IR::Opcode::FSwizzleAdd:
445 info.uses_fswzadd = true; 450 info.uses_fswzadd = true;
446 break; 451 break;
452 case IR::Opcode::SharedAtomicInc32:
453 info.uses_shared_increment = true;
454 break;
455 case IR::Opcode::SharedAtomicDec32:
456 info.uses_shared_decrement = true;
457 break;
458 case IR::Opcode::GlobalAtomicInc32:
459 case IR::Opcode::StorageAtomicInc32:
460 info.uses_global_increment = true;
461 break;
462 case IR::Opcode::GlobalAtomicDec32:
463 case IR::Opcode::StorageAtomicDec32:
464 info.uses_global_decrement = true;
465 break;
466 case IR::Opcode::GlobalAtomicAddF32:
467 case IR::Opcode::StorageAtomicAddF32:
468 info.uses_atomic_f32_add = true;
469 break;
470 case IR::Opcode::GlobalAtomicAddF16x2:
471 case IR::Opcode::StorageAtomicAddF16x2:
472 info.uses_atomic_f16x2_add = true;
473 break;
474 case IR::Opcode::GlobalAtomicAddF32x2:
475 case IR::Opcode::StorageAtomicAddF32x2:
476 info.uses_atomic_f32x2_add = true;
477 break;
478 case IR::Opcode::GlobalAtomicMinF16x2:
479 case IR::Opcode::StorageAtomicMinF16x2:
480 info.uses_atomic_f16x2_min = true;
481 break;
482 case IR::Opcode::GlobalAtomicMinF32x2:
483 case IR::Opcode::StorageAtomicMinF32x2:
484 info.uses_atomic_f32x2_min = true;
485 break;
486 case IR::Opcode::GlobalAtomicMaxF16x2:
487 case IR::Opcode::StorageAtomicMaxF16x2:
488 info.uses_atomic_f16x2_max = true;
489 break;
490 case IR::Opcode::GlobalAtomicMaxF32x2:
491 case IR::Opcode::StorageAtomicMaxF32x2:
492 info.uses_atomic_f32x2_max = true;
493 break;
494 case IR::Opcode::GlobalAtomicIAdd64:
495 case IR::Opcode::GlobalAtomicSMin64:
496 case IR::Opcode::GlobalAtomicUMin64:
497 case IR::Opcode::GlobalAtomicSMax64:
498 case IR::Opcode::GlobalAtomicUMax64:
499 case IR::Opcode::GlobalAtomicAnd64:
500 case IR::Opcode::GlobalAtomicOr64:
501 case IR::Opcode::GlobalAtomicXor64:
502 case IR::Opcode::GlobalAtomicExchange64:
503 case IR::Opcode::StorageAtomicIAdd64:
504 case IR::Opcode::StorageAtomicSMin64:
505 case IR::Opcode::StorageAtomicUMin64:
506 case IR::Opcode::StorageAtomicSMax64:
507 case IR::Opcode::StorageAtomicUMax64:
508 case IR::Opcode::StorageAtomicAnd64:
509 case IR::Opcode::StorageAtomicOr64:
510 case IR::Opcode::StorageAtomicXor64:
511 info.uses_64_bit_atomics = true;
512 break;
513 case IR::Opcode::SharedAtomicExchange64:
514 info.uses_64_bit_atomics = true;
515 info.uses_shared_memory_u32x2 = true;
516 break;
447 default: 517 default:
448 break; 518 break;
449 } 519 }
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index afe871505..0d4f266c3 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) {
72 case IR::Opcode::WriteGlobal32: 72 case IR::Opcode::WriteGlobal32:
73 case IR::Opcode::WriteGlobal64: 73 case IR::Opcode::WriteGlobal64:
74 case IR::Opcode::WriteGlobal128: 74 case IR::Opcode::WriteGlobal128:
75 case IR::Opcode::GlobalAtomicIAdd32:
76 case IR::Opcode::GlobalAtomicSMin32:
77 case IR::Opcode::GlobalAtomicUMin32:
78 case IR::Opcode::GlobalAtomicSMax32:
79 case IR::Opcode::GlobalAtomicUMax32:
80 case IR::Opcode::GlobalAtomicInc32:
81 case IR::Opcode::GlobalAtomicDec32:
82 case IR::Opcode::GlobalAtomicAnd32:
83 case IR::Opcode::GlobalAtomicOr32:
84 case IR::Opcode::GlobalAtomicXor32:
85 case IR::Opcode::GlobalAtomicExchange32:
86 case IR::Opcode::GlobalAtomicIAdd64:
87 case IR::Opcode::GlobalAtomicSMin64:
88 case IR::Opcode::GlobalAtomicUMin64:
89 case IR::Opcode::GlobalAtomicSMax64:
90 case IR::Opcode::GlobalAtomicUMax64:
91 case IR::Opcode::GlobalAtomicAnd64:
92 case IR::Opcode::GlobalAtomicOr64:
93 case IR::Opcode::GlobalAtomicXor64:
94 case IR::Opcode::GlobalAtomicExchange64:
95 case IR::Opcode::GlobalAtomicAddF32:
96 case IR::Opcode::GlobalAtomicAddF16x2:
97 case IR::Opcode::GlobalAtomicAddF32x2:
98 case IR::Opcode::GlobalAtomicMinF16x2:
99 case IR::Opcode::GlobalAtomicMinF32x2:
100 case IR::Opcode::GlobalAtomicMaxF16x2:
101 case IR::Opcode::GlobalAtomicMaxF32x2:
75 return true; 102 return true;
76 default: 103 default:
77 return false; 104 return false;
@@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
125 return IR::Opcode::WriteStorage64; 152 return IR::Opcode::WriteStorage64;
126 case IR::Opcode::WriteGlobal128: 153 case IR::Opcode::WriteGlobal128:
127 return IR::Opcode::WriteStorage128; 154 return IR::Opcode::WriteStorage128;
155 case IR::Opcode::GlobalAtomicIAdd32:
156 return IR::Opcode::StorageAtomicIAdd32;
157 case IR::Opcode::GlobalAtomicSMin32:
158 return IR::Opcode::StorageAtomicSMin32;
159 case IR::Opcode::GlobalAtomicUMin32:
160 return IR::Opcode::StorageAtomicUMin32;
161 case IR::Opcode::GlobalAtomicSMax32:
162 return IR::Opcode::StorageAtomicSMax32;
163 case IR::Opcode::GlobalAtomicUMax32:
164 return IR::Opcode::StorageAtomicUMax32;
165 case IR::Opcode::GlobalAtomicInc32:
166 return IR::Opcode::StorageAtomicInc32;
167 case IR::Opcode::GlobalAtomicDec32:
168 return IR::Opcode::StorageAtomicDec32;
169 case IR::Opcode::GlobalAtomicAnd32:
170 return IR::Opcode::StorageAtomicAnd32;
171 case IR::Opcode::GlobalAtomicOr32:
172 return IR::Opcode::StorageAtomicOr32;
173 case IR::Opcode::GlobalAtomicXor32:
174 return IR::Opcode::StorageAtomicXor32;
175 case IR::Opcode::GlobalAtomicIAdd64:
176 return IR::Opcode::StorageAtomicIAdd64;
177 case IR::Opcode::GlobalAtomicSMin64:
178 return IR::Opcode::StorageAtomicSMin64;
179 case IR::Opcode::GlobalAtomicUMin64:
180 return IR::Opcode::StorageAtomicUMin64;
181 case IR::Opcode::GlobalAtomicSMax64:
182 return IR::Opcode::StorageAtomicSMax64;
183 case IR::Opcode::GlobalAtomicUMax64:
184 return IR::Opcode::StorageAtomicUMax64;
185 case IR::Opcode::GlobalAtomicAnd64:
186 return IR::Opcode::StorageAtomicAnd64;
187 case IR::Opcode::GlobalAtomicOr64:
188 return IR::Opcode::StorageAtomicOr64;
189 case IR::Opcode::GlobalAtomicXor64:
190 return IR::Opcode::StorageAtomicXor64;
191 case IR::Opcode::GlobalAtomicExchange32:
192 return IR::Opcode::StorageAtomicExchange32;
193 case IR::Opcode::GlobalAtomicExchange64:
194 return IR::Opcode::StorageAtomicExchange64;
195 case IR::Opcode::GlobalAtomicAddF32:
196 return IR::Opcode::StorageAtomicAddF32;
197 case IR::Opcode::GlobalAtomicAddF16x2:
198 return IR::Opcode::StorageAtomicAddF16x2;
199 case IR::Opcode::GlobalAtomicMinF16x2:
200 return IR::Opcode::StorageAtomicMinF16x2;
201 case IR::Opcode::GlobalAtomicMaxF16x2:
202 return IR::Opcode::StorageAtomicMaxF16x2;
203 case IR::Opcode::GlobalAtomicAddF32x2:
204 return IR::Opcode::StorageAtomicAddF32x2;
205 case IR::Opcode::GlobalAtomicMinF32x2:
206 return IR::Opcode::StorageAtomicMinF32x2;
207 case IR::Opcode::GlobalAtomicMaxF32x2:
208 return IR::Opcode::StorageAtomicMaxF32x2;
128 default: 209 default:
129 throw InvalidArgument("Invalid global memory opcode {}", opcode); 210 throw InvalidArgument("Invalid global memory opcode {}", opcode);
130 } 211 }
@@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index
328 inst.Invalidate(); 409 inst.Invalidate();
329} 410}
330 411
412/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
413void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
414 const IR::U32& offset) {
415 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
416 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
417 const IR::Value value{
418 &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
419 inst.ReplaceUsesWith(value);
420}
421
331/// Replace a global memory instruction with its storage buffer equivalent 422/// Replace a global memory instruction with its storage buffer equivalent
332void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, 423void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
333 const IR::U32& offset) { 424 const IR::U32& offset) {
@@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
348 case IR::Opcode::WriteGlobal64: 439 case IR::Opcode::WriteGlobal64:
349 case IR::Opcode::WriteGlobal128: 440 case IR::Opcode::WriteGlobal128:
350 return ReplaceWrite(block, inst, storage_index, offset); 441 return ReplaceWrite(block, inst, storage_index, offset);
442 case IR::Opcode::GlobalAtomicIAdd32:
443 case IR::Opcode::GlobalAtomicSMin32:
444 case IR::Opcode::GlobalAtomicUMin32:
445 case IR::Opcode::GlobalAtomicSMax32:
446 case IR::Opcode::GlobalAtomicUMax32:
447 case IR::Opcode::GlobalAtomicInc32:
448 case IR::Opcode::GlobalAtomicDec32:
449 case IR::Opcode::GlobalAtomicAnd32:
450 case IR::Opcode::GlobalAtomicOr32:
451 case IR::Opcode::GlobalAtomicXor32:
452 case IR::Opcode::GlobalAtomicExchange32:
453 case IR::Opcode::GlobalAtomicIAdd64:
454 case IR::Opcode::GlobalAtomicSMin64:
455 case IR::Opcode::GlobalAtomicUMin64:
456 case IR::Opcode::GlobalAtomicSMax64:
457 case IR::Opcode::GlobalAtomicUMax64:
458 case IR::Opcode::GlobalAtomicAnd64:
459 case IR::Opcode::GlobalAtomicOr64:
460 case IR::Opcode::GlobalAtomicXor64:
461 case IR::Opcode::GlobalAtomicExchange64:
462 case IR::Opcode::GlobalAtomicAddF32:
463 case IR::Opcode::GlobalAtomicAddF16x2:
464 case IR::Opcode::GlobalAtomicAddF32x2:
465 case IR::Opcode::GlobalAtomicMinF16x2:
466 case IR::Opcode::GlobalAtomicMinF32x2:
467 case IR::Opcode::GlobalAtomicMaxF16x2:
468 case IR::Opcode::GlobalAtomicMaxF32x2:
469 return ReplaceAtomic(block, inst, storage_index, offset);
351 default: 470 default:
352 throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); 471 throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
353 } 472 }
@@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
364 CollectStorageBuffers(*block, inst, info); 483 CollectStorageBuffers(*block, inst, info);
365 } 484 }
366 } 485 }
367 u32 storage_index{};
368 for (const StorageBufferAddr& storage_buffer : info.set) { 486 for (const StorageBufferAddr& storage_buffer : info.set) {
369 program.info.storage_buffers_descriptors.push_back({ 487 program.info.storage_buffers_descriptors.push_back({
370 .cbuf_index = storage_buffer.index, 488 .cbuf_index = storage_buffer.index,
@@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
372 .count = 1, 490 .count = 1,
373 .is_written{info.writes.contains(storage_buffer)}, 491 .is_written{info.writes.contains(storage_buffer)},
374 }); 492 });
375 ++storage_index;
376 } 493 }
377 for (const StorageInst& storage_inst : info.to_replace) { 494 for (const StorageInst& storage_inst : info.to_replace) {
378 const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; 495 const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
index 52576b07f..62e73d52d 100644
--- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) {
114 return IR::Opcode::ConvertF32U32; 114 return IR::Opcode::ConvertF32U32;
115 case IR::Opcode::ConvertF16U64: 115 case IR::Opcode::ConvertF16U64:
116 return IR::Opcode::ConvertF32U64; 116 return IR::Opcode::ConvertF32U64;
117 case IR::Opcode::GlobalAtomicAddF16x2:
118 return IR::Opcode::GlobalAtomicAddF32x2;
119 case IR::Opcode::StorageAtomicAddF16x2:
120 return IR::Opcode::StorageAtomicAddF32x2;
121 case IR::Opcode::GlobalAtomicMinF16x2:
122 return IR::Opcode::GlobalAtomicMinF32x2;
123 case IR::Opcode::StorageAtomicMinF16x2:
124 return IR::Opcode::StorageAtomicMinF32x2;
125 case IR::Opcode::GlobalAtomicMaxF16x2:
126 return IR::Opcode::GlobalAtomicMaxF32x2;
127 case IR::Opcode::StorageAtomicMaxF16x2:
128 return IR::Opcode::StorageAtomicMaxF32x2;
117 default: 129 default:
118 return op; 130 return op;
119 } 131 }