summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/ir_opt
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp928
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp610
-rw-r--r--src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp26
-rw-r--r--src/shader_recompiler/ir_opt/dual_vertex_pass.cpp30
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp526
-rw-r--r--src/shader_recompiler/ir_opt/identity_removal_pass.cpp38
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp143
-rw-r--r--src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp218
-rw-r--r--src/shader_recompiler/ir_opt/passes.h32
-rw-r--r--src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp383
-rw-r--r--src/shader_recompiler/ir_opt/texture_pass.cpp523
-rw-r--r--src/shader_recompiler/ir_opt/verification_pass.cpp98
12 files changed, 3555 insertions, 0 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
new file mode 100644
index 000000000..5ead930f1
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -0,0 +1,928 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/alignment.h"
6#include "shader_recompiler/environment.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8#include "shader_recompiler/frontend/ir/program.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/ir_opt/passes.h"
11#include "shader_recompiler/shader_info.h"
12
13namespace Shader::Optimization {
14namespace {
15void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
16 if (count != 1) {
17 throw NotImplementedException("Constant buffer descriptor indexing");
18 }
19 if ((info.constant_buffer_mask & (1U << index)) != 0) {
20 return;
21 }
22 info.constant_buffer_mask |= 1U << index;
23
24 auto& cbufs{info.constant_buffer_descriptors};
25 cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index),
26 ConstantBufferDescriptor{
27 .index = index,
28 .count = 1,
29 });
30}
31
32void GetPatch(Info& info, IR::Patch patch) {
33 if (!IR::IsGeneric(patch)) {
34 throw NotImplementedException("Reading non-generic patch {}", patch);
35 }
36 info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
37}
38
39void SetPatch(Info& info, IR::Patch patch) {
40 if (IR::IsGeneric(patch)) {
41 info.uses_patches.at(IR::GenericPatchIndex(patch)) = true;
42 return;
43 }
44 switch (patch) {
45 case IR::Patch::TessellationLodLeft:
46 case IR::Patch::TessellationLodTop:
47 case IR::Patch::TessellationLodRight:
48 case IR::Patch::TessellationLodBottom:
49 info.stores_tess_level_outer = true;
50 break;
51 case IR::Patch::TessellationLodInteriorU:
52 case IR::Patch::TessellationLodInteriorV:
53 info.stores_tess_level_inner = true;
54 break;
55 default:
56 throw NotImplementedException("Set patch {}", patch);
57 }
58}
59
60void CheckCBufNVN(Info& info, IR::Inst& inst) {
61 const IR::Value cbuf_index{inst.Arg(0)};
62 if (!cbuf_index.IsImmediate()) {
63 info.nvn_buffer_used.set();
64 return;
65 }
66 const u32 index{cbuf_index.U32()};
67 if (index != 0) {
68 return;
69 }
70 const IR::Value cbuf_offset{inst.Arg(1)};
71 if (!cbuf_offset.IsImmediate()) {
72 info.nvn_buffer_used.set();
73 return;
74 }
75 const u32 offset{cbuf_offset.U32()};
76 const u32 descriptor_size{0x10};
77 const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
78 if (offset >= info.nvn_buffer_base && offset < upper_limit) {
79 const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
80 info.nvn_buffer_used.set(nvn_index, true);
81 }
82}
83
84void VisitUsages(Info& info, IR::Inst& inst) {
85 switch (inst.GetOpcode()) {
86 case IR::Opcode::CompositeConstructF16x2:
87 case IR::Opcode::CompositeConstructF16x3:
88 case IR::Opcode::CompositeConstructF16x4:
89 case IR::Opcode::CompositeExtractF16x2:
90 case IR::Opcode::CompositeExtractF16x3:
91 case IR::Opcode::CompositeExtractF16x4:
92 case IR::Opcode::CompositeInsertF16x2:
93 case IR::Opcode::CompositeInsertF16x3:
94 case IR::Opcode::CompositeInsertF16x4:
95 case IR::Opcode::SelectF16:
96 case IR::Opcode::BitCastU16F16:
97 case IR::Opcode::BitCastF16U16:
98 case IR::Opcode::PackFloat2x16:
99 case IR::Opcode::UnpackFloat2x16:
100 case IR::Opcode::ConvertS16F16:
101 case IR::Opcode::ConvertS32F16:
102 case IR::Opcode::ConvertS64F16:
103 case IR::Opcode::ConvertU16F16:
104 case IR::Opcode::ConvertU32F16:
105 case IR::Opcode::ConvertU64F16:
106 case IR::Opcode::ConvertF16S8:
107 case IR::Opcode::ConvertF16S16:
108 case IR::Opcode::ConvertF16S32:
109 case IR::Opcode::ConvertF16S64:
110 case IR::Opcode::ConvertF16U8:
111 case IR::Opcode::ConvertF16U16:
112 case IR::Opcode::ConvertF16U32:
113 case IR::Opcode::ConvertF16U64:
114 case IR::Opcode::FPAbs16:
115 case IR::Opcode::FPAdd16:
116 case IR::Opcode::FPCeil16:
117 case IR::Opcode::FPFloor16:
118 case IR::Opcode::FPFma16:
119 case IR::Opcode::FPMul16:
120 case IR::Opcode::FPNeg16:
121 case IR::Opcode::FPRoundEven16:
122 case IR::Opcode::FPSaturate16:
123 case IR::Opcode::FPClamp16:
124 case IR::Opcode::FPTrunc16:
125 case IR::Opcode::FPOrdEqual16:
126 case IR::Opcode::FPUnordEqual16:
127 case IR::Opcode::FPOrdNotEqual16:
128 case IR::Opcode::FPUnordNotEqual16:
129 case IR::Opcode::FPOrdLessThan16:
130 case IR::Opcode::FPUnordLessThan16:
131 case IR::Opcode::FPOrdGreaterThan16:
132 case IR::Opcode::FPUnordGreaterThan16:
133 case IR::Opcode::FPOrdLessThanEqual16:
134 case IR::Opcode::FPUnordLessThanEqual16:
135 case IR::Opcode::FPOrdGreaterThanEqual16:
136 case IR::Opcode::FPUnordGreaterThanEqual16:
137 case IR::Opcode::FPIsNan16:
138 case IR::Opcode::GlobalAtomicAddF16x2:
139 case IR::Opcode::GlobalAtomicMinF16x2:
140 case IR::Opcode::GlobalAtomicMaxF16x2:
141 case IR::Opcode::StorageAtomicAddF16x2:
142 case IR::Opcode::StorageAtomicMinF16x2:
143 case IR::Opcode::StorageAtomicMaxF16x2:
144 info.uses_fp16 = true;
145 break;
146 case IR::Opcode::CompositeConstructF64x2:
147 case IR::Opcode::CompositeConstructF64x3:
148 case IR::Opcode::CompositeConstructF64x4:
149 case IR::Opcode::CompositeExtractF64x2:
150 case IR::Opcode::CompositeExtractF64x3:
151 case IR::Opcode::CompositeExtractF64x4:
152 case IR::Opcode::CompositeInsertF64x2:
153 case IR::Opcode::CompositeInsertF64x3:
154 case IR::Opcode::CompositeInsertF64x4:
155 case IR::Opcode::SelectF64:
156 case IR::Opcode::BitCastU64F64:
157 case IR::Opcode::BitCastF64U64:
158 case IR::Opcode::PackDouble2x32:
159 case IR::Opcode::UnpackDouble2x32:
160 case IR::Opcode::FPAbs64:
161 case IR::Opcode::FPAdd64:
162 case IR::Opcode::FPCeil64:
163 case IR::Opcode::FPFloor64:
164 case IR::Opcode::FPFma64:
165 case IR::Opcode::FPMax64:
166 case IR::Opcode::FPMin64:
167 case IR::Opcode::FPMul64:
168 case IR::Opcode::FPNeg64:
169 case IR::Opcode::FPRecip64:
170 case IR::Opcode::FPRecipSqrt64:
171 case IR::Opcode::FPRoundEven64:
172 case IR::Opcode::FPSaturate64:
173 case IR::Opcode::FPClamp64:
174 case IR::Opcode::FPTrunc64:
175 case IR::Opcode::FPOrdEqual64:
176 case IR::Opcode::FPUnordEqual64:
177 case IR::Opcode::FPOrdNotEqual64:
178 case IR::Opcode::FPUnordNotEqual64:
179 case IR::Opcode::FPOrdLessThan64:
180 case IR::Opcode::FPUnordLessThan64:
181 case IR::Opcode::FPOrdGreaterThan64:
182 case IR::Opcode::FPUnordGreaterThan64:
183 case IR::Opcode::FPOrdLessThanEqual64:
184 case IR::Opcode::FPUnordLessThanEqual64:
185 case IR::Opcode::FPOrdGreaterThanEqual64:
186 case IR::Opcode::FPUnordGreaterThanEqual64:
187 case IR::Opcode::FPIsNan64:
188 case IR::Opcode::ConvertS16F64:
189 case IR::Opcode::ConvertS32F64:
190 case IR::Opcode::ConvertS64F64:
191 case IR::Opcode::ConvertU16F64:
192 case IR::Opcode::ConvertU32F64:
193 case IR::Opcode::ConvertU64F64:
194 case IR::Opcode::ConvertF32F64:
195 case IR::Opcode::ConvertF64F32:
196 case IR::Opcode::ConvertF64S8:
197 case IR::Opcode::ConvertF64S16:
198 case IR::Opcode::ConvertF64S32:
199 case IR::Opcode::ConvertF64S64:
200 case IR::Opcode::ConvertF64U8:
201 case IR::Opcode::ConvertF64U16:
202 case IR::Opcode::ConvertF64U32:
203 case IR::Opcode::ConvertF64U64:
204 info.uses_fp64 = true;
205 break;
206 default:
207 break;
208 }
209 switch (inst.GetOpcode()) {
210 case IR::Opcode::GetCbufU8:
211 case IR::Opcode::GetCbufS8:
212 case IR::Opcode::UndefU8:
213 case IR::Opcode::LoadGlobalU8:
214 case IR::Opcode::LoadGlobalS8:
215 case IR::Opcode::WriteGlobalU8:
216 case IR::Opcode::WriteGlobalS8:
217 case IR::Opcode::LoadStorageU8:
218 case IR::Opcode::LoadStorageS8:
219 case IR::Opcode::WriteStorageU8:
220 case IR::Opcode::WriteStorageS8:
221 case IR::Opcode::LoadSharedU8:
222 case IR::Opcode::LoadSharedS8:
223 case IR::Opcode::WriteSharedU8:
224 case IR::Opcode::SelectU8:
225 case IR::Opcode::ConvertF16S8:
226 case IR::Opcode::ConvertF16U8:
227 case IR::Opcode::ConvertF32S8:
228 case IR::Opcode::ConvertF32U8:
229 case IR::Opcode::ConvertF64S8:
230 case IR::Opcode::ConvertF64U8:
231 info.uses_int8 = true;
232 break;
233 default:
234 break;
235 }
236 switch (inst.GetOpcode()) {
237 case IR::Opcode::GetCbufU16:
238 case IR::Opcode::GetCbufS16:
239 case IR::Opcode::UndefU16:
240 case IR::Opcode::LoadGlobalU16:
241 case IR::Opcode::LoadGlobalS16:
242 case IR::Opcode::WriteGlobalU16:
243 case IR::Opcode::WriteGlobalS16:
244 case IR::Opcode::LoadStorageU16:
245 case IR::Opcode::LoadStorageS16:
246 case IR::Opcode::WriteStorageU16:
247 case IR::Opcode::WriteStorageS16:
248 case IR::Opcode::LoadSharedU16:
249 case IR::Opcode::LoadSharedS16:
250 case IR::Opcode::WriteSharedU16:
251 case IR::Opcode::SelectU16:
252 case IR::Opcode::BitCastU16F16:
253 case IR::Opcode::BitCastF16U16:
254 case IR::Opcode::ConvertS16F16:
255 case IR::Opcode::ConvertS16F32:
256 case IR::Opcode::ConvertS16F64:
257 case IR::Opcode::ConvertU16F16:
258 case IR::Opcode::ConvertU16F32:
259 case IR::Opcode::ConvertU16F64:
260 case IR::Opcode::ConvertF16S16:
261 case IR::Opcode::ConvertF16U16:
262 case IR::Opcode::ConvertF32S16:
263 case IR::Opcode::ConvertF32U16:
264 case IR::Opcode::ConvertF64S16:
265 case IR::Opcode::ConvertF64U16:
266 info.uses_int16 = true;
267 break;
268 default:
269 break;
270 }
271 switch (inst.GetOpcode()) {
272 case IR::Opcode::UndefU64:
273 case IR::Opcode::LoadGlobalU8:
274 case IR::Opcode::LoadGlobalS8:
275 case IR::Opcode::LoadGlobalU16:
276 case IR::Opcode::LoadGlobalS16:
277 case IR::Opcode::LoadGlobal32:
278 case IR::Opcode::LoadGlobal64:
279 case IR::Opcode::LoadGlobal128:
280 case IR::Opcode::WriteGlobalU8:
281 case IR::Opcode::WriteGlobalS8:
282 case IR::Opcode::WriteGlobalU16:
283 case IR::Opcode::WriteGlobalS16:
284 case IR::Opcode::WriteGlobal32:
285 case IR::Opcode::WriteGlobal64:
286 case IR::Opcode::WriteGlobal128:
287 case IR::Opcode::SelectU64:
288 case IR::Opcode::BitCastU64F64:
289 case IR::Opcode::BitCastF64U64:
290 case IR::Opcode::PackUint2x32:
291 case IR::Opcode::UnpackUint2x32:
292 case IR::Opcode::IAdd64:
293 case IR::Opcode::ISub64:
294 case IR::Opcode::INeg64:
295 case IR::Opcode::ShiftLeftLogical64:
296 case IR::Opcode::ShiftRightLogical64:
297 case IR::Opcode::ShiftRightArithmetic64:
298 case IR::Opcode::ConvertS64F16:
299 case IR::Opcode::ConvertS64F32:
300 case IR::Opcode::ConvertS64F64:
301 case IR::Opcode::ConvertU64F16:
302 case IR::Opcode::ConvertU64F32:
303 case IR::Opcode::ConvertU64F64:
304 case IR::Opcode::ConvertU64U32:
305 case IR::Opcode::ConvertU32U64:
306 case IR::Opcode::ConvertF16U64:
307 case IR::Opcode::ConvertF32U64:
308 case IR::Opcode::ConvertF64U64:
309 case IR::Opcode::SharedAtomicExchange64:
310 case IR::Opcode::GlobalAtomicIAdd64:
311 case IR::Opcode::GlobalAtomicSMin64:
312 case IR::Opcode::GlobalAtomicUMin64:
313 case IR::Opcode::GlobalAtomicSMax64:
314 case IR::Opcode::GlobalAtomicUMax64:
315 case IR::Opcode::GlobalAtomicAnd64:
316 case IR::Opcode::GlobalAtomicOr64:
317 case IR::Opcode::GlobalAtomicXor64:
318 case IR::Opcode::GlobalAtomicExchange64:
319 case IR::Opcode::StorageAtomicIAdd64:
320 case IR::Opcode::StorageAtomicSMin64:
321 case IR::Opcode::StorageAtomicUMin64:
322 case IR::Opcode::StorageAtomicSMax64:
323 case IR::Opcode::StorageAtomicUMax64:
324 case IR::Opcode::StorageAtomicAnd64:
325 case IR::Opcode::StorageAtomicOr64:
326 case IR::Opcode::StorageAtomicXor64:
327 case IR::Opcode::StorageAtomicExchange64:
328 info.uses_int64 = true;
329 break;
330 default:
331 break;
332 }
333 switch (inst.GetOpcode()) {
334 case IR::Opcode::WriteGlobalU8:
335 case IR::Opcode::WriteGlobalS8:
336 case IR::Opcode::WriteGlobalU16:
337 case IR::Opcode::WriteGlobalS16:
338 case IR::Opcode::WriteGlobal32:
339 case IR::Opcode::WriteGlobal64:
340 case IR::Opcode::WriteGlobal128:
341 case IR::Opcode::GlobalAtomicIAdd32:
342 case IR::Opcode::GlobalAtomicSMin32:
343 case IR::Opcode::GlobalAtomicUMin32:
344 case IR::Opcode::GlobalAtomicSMax32:
345 case IR::Opcode::GlobalAtomicUMax32:
346 case IR::Opcode::GlobalAtomicInc32:
347 case IR::Opcode::GlobalAtomicDec32:
348 case IR::Opcode::GlobalAtomicAnd32:
349 case IR::Opcode::GlobalAtomicOr32:
350 case IR::Opcode::GlobalAtomicXor32:
351 case IR::Opcode::GlobalAtomicExchange32:
352 case IR::Opcode::GlobalAtomicIAdd64:
353 case IR::Opcode::GlobalAtomicSMin64:
354 case IR::Opcode::GlobalAtomicUMin64:
355 case IR::Opcode::GlobalAtomicSMax64:
356 case IR::Opcode::GlobalAtomicUMax64:
357 case IR::Opcode::GlobalAtomicAnd64:
358 case IR::Opcode::GlobalAtomicOr64:
359 case IR::Opcode::GlobalAtomicXor64:
360 case IR::Opcode::GlobalAtomicExchange64:
361 case IR::Opcode::GlobalAtomicAddF32:
362 case IR::Opcode::GlobalAtomicAddF16x2:
363 case IR::Opcode::GlobalAtomicAddF32x2:
364 case IR::Opcode::GlobalAtomicMinF16x2:
365 case IR::Opcode::GlobalAtomicMinF32x2:
366 case IR::Opcode::GlobalAtomicMaxF16x2:
367 case IR::Opcode::GlobalAtomicMaxF32x2:
368 info.stores_global_memory = true;
369 [[fallthrough]];
370 case IR::Opcode::LoadGlobalU8:
371 case IR::Opcode::LoadGlobalS8:
372 case IR::Opcode::LoadGlobalU16:
373 case IR::Opcode::LoadGlobalS16:
374 case IR::Opcode::LoadGlobal32:
375 case IR::Opcode::LoadGlobal64:
376 case IR::Opcode::LoadGlobal128:
377 info.uses_int64 = true;
378 info.uses_global_memory = true;
379 info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
380 info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4;
381 break;
382 default:
383 break;
384 }
385 switch (inst.GetOpcode()) {
386 case IR::Opcode::DemoteToHelperInvocation:
387 info.uses_demote_to_helper_invocation = true;
388 break;
389 case IR::Opcode::GetAttribute:
390 info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
391 break;
392 case IR::Opcode::SetAttribute:
393 info.stores.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true;
394 break;
395 case IR::Opcode::GetPatch:
396 GetPatch(info, inst.Arg(0).Patch());
397 break;
398 case IR::Opcode::SetPatch:
399 SetPatch(info, inst.Arg(0).Patch());
400 break;
401 case IR::Opcode::GetAttributeIndexed:
402 info.loads_indexed_attributes = true;
403 break;
404 case IR::Opcode::SetAttributeIndexed:
405 info.stores_indexed_attributes = true;
406 break;
407 case IR::Opcode::SetFragColor:
408 info.stores_frag_color[inst.Arg(0).U32()] = true;
409 break;
410 case IR::Opcode::SetSampleMask:
411 info.stores_sample_mask = true;
412 break;
413 case IR::Opcode::SetFragDepth:
414 info.stores_frag_depth = true;
415 break;
416 case IR::Opcode::WorkgroupId:
417 info.uses_workgroup_id = true;
418 break;
419 case IR::Opcode::LocalInvocationId:
420 info.uses_local_invocation_id = true;
421 break;
422 case IR::Opcode::InvocationId:
423 info.uses_invocation_id = true;
424 break;
425 case IR::Opcode::SampleId:
426 info.uses_sample_id = true;
427 break;
428 case IR::Opcode::IsHelperInvocation:
429 info.uses_is_helper_invocation = true;
430 break;
431 case IR::Opcode::LaneId:
432 info.uses_subgroup_invocation_id = true;
433 break;
434 case IR::Opcode::ShuffleIndex:
435 case IR::Opcode::ShuffleUp:
436 case IR::Opcode::ShuffleDown:
437 case IR::Opcode::ShuffleButterfly:
438 info.uses_subgroup_shuffles = true;
439 break;
440 case IR::Opcode::GetCbufU8:
441 case IR::Opcode::GetCbufS8:
442 case IR::Opcode::GetCbufU16:
443 case IR::Opcode::GetCbufS16:
444 case IR::Opcode::GetCbufU32:
445 case IR::Opcode::GetCbufF32:
446 case IR::Opcode::GetCbufU32x2: {
447 const IR::Value index{inst.Arg(0)};
448 const IR::Value offset{inst.Arg(1)};
449 if (!index.IsImmediate()) {
450 throw NotImplementedException("Constant buffer with non-immediate index");
451 }
452 AddConstantBufferDescriptor(info, index.U32(), 1);
453 u32 element_size{};
454 switch (inst.GetOpcode()) {
455 case IR::Opcode::GetCbufU8:
456 case IR::Opcode::GetCbufS8:
457 info.used_constant_buffer_types |= IR::Type::U8;
458 element_size = 1;
459 break;
460 case IR::Opcode::GetCbufU16:
461 case IR::Opcode::GetCbufS16:
462 info.used_constant_buffer_types |= IR::Type::U16;
463 element_size = 2;
464 break;
465 case IR::Opcode::GetCbufU32:
466 info.used_constant_buffer_types |= IR::Type::U32;
467 element_size = 4;
468 break;
469 case IR::Opcode::GetCbufF32:
470 info.used_constant_buffer_types |= IR::Type::F32;
471 element_size = 4;
472 break;
473 case IR::Opcode::GetCbufU32x2:
474 info.used_constant_buffer_types |= IR::Type::U32x2;
475 element_size = 8;
476 break;
477 default:
478 break;
479 }
480 u32& size{info.constant_buffer_used_sizes[index.U32()]};
481 if (offset.IsImmediate()) {
482 size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
483 } else {
484 size = 0x10'000;
485 }
486 break;
487 }
488 case IR::Opcode::BindlessImageSampleImplicitLod:
489 case IR::Opcode::BindlessImageSampleExplicitLod:
490 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
491 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
492 case IR::Opcode::BindlessImageGather:
493 case IR::Opcode::BindlessImageGatherDref:
494 case IR::Opcode::BindlessImageFetch:
495 case IR::Opcode::BindlessImageQueryDimensions:
496 case IR::Opcode::BindlessImageQueryLod:
497 case IR::Opcode::BindlessImageGradient:
498 case IR::Opcode::BoundImageSampleImplicitLod:
499 case IR::Opcode::BoundImageSampleExplicitLod:
500 case IR::Opcode::BoundImageSampleDrefImplicitLod:
501 case IR::Opcode::BoundImageSampleDrefExplicitLod:
502 case IR::Opcode::BoundImageGather:
503 case IR::Opcode::BoundImageGatherDref:
504 case IR::Opcode::BoundImageFetch:
505 case IR::Opcode::BoundImageQueryDimensions:
506 case IR::Opcode::BoundImageQueryLod:
507 case IR::Opcode::BoundImageGradient:
508 case IR::Opcode::ImageGather:
509 case IR::Opcode::ImageGatherDref:
510 case IR::Opcode::ImageFetch:
511 case IR::Opcode::ImageQueryDimensions:
512 case IR::Opcode::ImageGradient: {
513 const TextureType type{inst.Flags<IR::TextureInstInfo>().type};
514 info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
515 info.uses_sparse_residency |=
516 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
517 break;
518 }
519 case IR::Opcode::ImageSampleImplicitLod:
520 case IR::Opcode::ImageSampleExplicitLod:
521 case IR::Opcode::ImageSampleDrefImplicitLod:
522 case IR::Opcode::ImageSampleDrefExplicitLod:
523 case IR::Opcode::ImageQueryLod: {
524 const auto flags{inst.Flags<IR::TextureInstInfo>()};
525 const TextureType type{flags.type};
526 info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D;
527 info.uses_shadow_lod |= flags.is_depth != 0;
528 info.uses_sparse_residency |=
529 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
530 break;
531 }
532 case IR::Opcode::ImageRead: {
533 const auto flags{inst.Flags<IR::TextureInstInfo>()};
534 info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless;
535 info.uses_sparse_residency |=
536 inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
537 break;
538 }
539 case IR::Opcode::ImageWrite: {
540 const auto flags{inst.Flags<IR::TextureInstInfo>()};
541 info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless;
542 info.uses_image_buffers |= flags.type == TextureType::Buffer;
543 break;
544 }
545 case IR::Opcode::SubgroupEqMask:
546 case IR::Opcode::SubgroupLtMask:
547 case IR::Opcode::SubgroupLeMask:
548 case IR::Opcode::SubgroupGtMask:
549 case IR::Opcode::SubgroupGeMask:
550 info.uses_subgroup_mask = true;
551 break;
552 case IR::Opcode::VoteAll:
553 case IR::Opcode::VoteAny:
554 case IR::Opcode::VoteEqual:
555 case IR::Opcode::SubgroupBallot:
556 info.uses_subgroup_vote = true;
557 break;
558 case IR::Opcode::FSwizzleAdd:
559 info.uses_fswzadd = true;
560 break;
561 case IR::Opcode::DPdxFine:
562 case IR::Opcode::DPdyFine:
563 case IR::Opcode::DPdxCoarse:
564 case IR::Opcode::DPdyCoarse:
565 info.uses_derivatives = true;
566 break;
567 case IR::Opcode::LoadStorageU8:
568 case IR::Opcode::LoadStorageS8:
569 case IR::Opcode::WriteStorageU8:
570 case IR::Opcode::WriteStorageS8:
571 info.used_storage_buffer_types |= IR::Type::U8;
572 break;
573 case IR::Opcode::LoadStorageU16:
574 case IR::Opcode::LoadStorageS16:
575 case IR::Opcode::WriteStorageU16:
576 case IR::Opcode::WriteStorageS16:
577 info.used_storage_buffer_types |= IR::Type::U16;
578 break;
579 case IR::Opcode::LoadStorage32:
580 case IR::Opcode::WriteStorage32:
581 case IR::Opcode::StorageAtomicIAdd32:
582 case IR::Opcode::StorageAtomicUMin32:
583 case IR::Opcode::StorageAtomicUMax32:
584 case IR::Opcode::StorageAtomicAnd32:
585 case IR::Opcode::StorageAtomicOr32:
586 case IR::Opcode::StorageAtomicXor32:
587 case IR::Opcode::StorageAtomicExchange32:
588 info.used_storage_buffer_types |= IR::Type::U32;
589 break;
590 case IR::Opcode::LoadStorage64:
591 case IR::Opcode::WriteStorage64:
592 info.used_storage_buffer_types |= IR::Type::U32x2;
593 break;
594 case IR::Opcode::LoadStorage128:
595 case IR::Opcode::WriteStorage128:
596 info.used_storage_buffer_types |= IR::Type::U32x4;
597 break;
598 case IR::Opcode::SharedAtomicSMin32:
599 info.uses_atomic_s32_min = true;
600 break;
601 case IR::Opcode::SharedAtomicSMax32:
602 info.uses_atomic_s32_max = true;
603 break;
604 case IR::Opcode::SharedAtomicInc32:
605 info.uses_shared_increment = true;
606 break;
607 case IR::Opcode::SharedAtomicDec32:
608 info.uses_shared_decrement = true;
609 break;
610 case IR::Opcode::SharedAtomicExchange64:
611 info.uses_int64_bit_atomics = true;
612 break;
613 case IR::Opcode::GlobalAtomicInc32:
614 case IR::Opcode::StorageAtomicInc32:
615 info.used_storage_buffer_types |= IR::Type::U32;
616 info.uses_global_increment = true;
617 break;
618 case IR::Opcode::GlobalAtomicDec32:
619 case IR::Opcode::StorageAtomicDec32:
620 info.used_storage_buffer_types |= IR::Type::U32;
621 info.uses_global_decrement = true;
622 break;
623 case IR::Opcode::GlobalAtomicAddF32:
624 case IR::Opcode::StorageAtomicAddF32:
625 info.used_storage_buffer_types |= IR::Type::U32;
626 info.uses_atomic_f32_add = true;
627 break;
628 case IR::Opcode::GlobalAtomicAddF16x2:
629 case IR::Opcode::StorageAtomicAddF16x2:
630 info.used_storage_buffer_types |= IR::Type::U32;
631 info.uses_atomic_f16x2_add = true;
632 break;
633 case IR::Opcode::GlobalAtomicAddF32x2:
634 case IR::Opcode::StorageAtomicAddF32x2:
635 info.used_storage_buffer_types |= IR::Type::U32;
636 info.uses_atomic_f32x2_add = true;
637 break;
638 case IR::Opcode::GlobalAtomicMinF16x2:
639 case IR::Opcode::StorageAtomicMinF16x2:
640 info.used_storage_buffer_types |= IR::Type::U32;
641 info.uses_atomic_f16x2_min = true;
642 break;
643 case IR::Opcode::GlobalAtomicMinF32x2:
644 case IR::Opcode::StorageAtomicMinF32x2:
645 info.used_storage_buffer_types |= IR::Type::U32;
646 info.uses_atomic_f32x2_min = true;
647 break;
648 case IR::Opcode::GlobalAtomicMaxF16x2:
649 case IR::Opcode::StorageAtomicMaxF16x2:
650 info.used_storage_buffer_types |= IR::Type::U32;
651 info.uses_atomic_f16x2_max = true;
652 break;
653 case IR::Opcode::GlobalAtomicMaxF32x2:
654 case IR::Opcode::StorageAtomicMaxF32x2:
655 info.used_storage_buffer_types |= IR::Type::U32;
656 info.uses_atomic_f32x2_max = true;
657 break;
658 case IR::Opcode::StorageAtomicSMin32:
659 info.used_storage_buffer_types |= IR::Type::U32;
660 info.uses_atomic_s32_min = true;
661 break;
662 case IR::Opcode::StorageAtomicSMax32:
663 info.used_storage_buffer_types |= IR::Type::U32;
664 info.uses_atomic_s32_max = true;
665 break;
666 case IR::Opcode::GlobalAtomicIAdd64:
667 case IR::Opcode::GlobalAtomicSMin64:
668 case IR::Opcode::GlobalAtomicUMin64:
669 case IR::Opcode::GlobalAtomicSMax64:
670 case IR::Opcode::GlobalAtomicUMax64:
671 case IR::Opcode::GlobalAtomicAnd64:
672 case IR::Opcode::GlobalAtomicOr64:
673 case IR::Opcode::GlobalAtomicXor64:
674 case IR::Opcode::GlobalAtomicExchange64:
675 case IR::Opcode::StorageAtomicIAdd64:
676 case IR::Opcode::StorageAtomicSMin64:
677 case IR::Opcode::StorageAtomicUMin64:
678 case IR::Opcode::StorageAtomicSMax64:
679 case IR::Opcode::StorageAtomicUMax64:
680 case IR::Opcode::StorageAtomicAnd64:
681 case IR::Opcode::StorageAtomicOr64:
682 case IR::Opcode::StorageAtomicXor64:
683 info.used_storage_buffer_types |= IR::Type::U64;
684 info.uses_int64_bit_atomics = true;
685 break;
686 case IR::Opcode::BindlessImageAtomicIAdd32:
687 case IR::Opcode::BindlessImageAtomicSMin32:
688 case IR::Opcode::BindlessImageAtomicUMin32:
689 case IR::Opcode::BindlessImageAtomicSMax32:
690 case IR::Opcode::BindlessImageAtomicUMax32:
691 case IR::Opcode::BindlessImageAtomicInc32:
692 case IR::Opcode::BindlessImageAtomicDec32:
693 case IR::Opcode::BindlessImageAtomicAnd32:
694 case IR::Opcode::BindlessImageAtomicOr32:
695 case IR::Opcode::BindlessImageAtomicXor32:
696 case IR::Opcode::BindlessImageAtomicExchange32:
697 case IR::Opcode::BoundImageAtomicIAdd32:
698 case IR::Opcode::BoundImageAtomicSMin32:
699 case IR::Opcode::BoundImageAtomicUMin32:
700 case IR::Opcode::BoundImageAtomicSMax32:
701 case IR::Opcode::BoundImageAtomicUMax32:
702 case IR::Opcode::BoundImageAtomicInc32:
703 case IR::Opcode::BoundImageAtomicDec32:
704 case IR::Opcode::BoundImageAtomicAnd32:
705 case IR::Opcode::BoundImageAtomicOr32:
706 case IR::Opcode::BoundImageAtomicXor32:
707 case IR::Opcode::BoundImageAtomicExchange32:
708 case IR::Opcode::ImageAtomicIAdd32:
709 case IR::Opcode::ImageAtomicSMin32:
710 case IR::Opcode::ImageAtomicUMin32:
711 case IR::Opcode::ImageAtomicSMax32:
712 case IR::Opcode::ImageAtomicUMax32:
713 case IR::Opcode::ImageAtomicInc32:
714 case IR::Opcode::ImageAtomicDec32:
715 case IR::Opcode::ImageAtomicAnd32:
716 case IR::Opcode::ImageAtomicOr32:
717 case IR::Opcode::ImageAtomicXor32:
718 case IR::Opcode::ImageAtomicExchange32:
719 info.uses_atomic_image_u32 = true;
720 break;
721 default:
722 break;
723 }
724}
725
726void VisitFpModifiers(Info& info, IR::Inst& inst) {
727 switch (inst.GetOpcode()) {
728 case IR::Opcode::FPAdd16:
729 case IR::Opcode::FPFma16:
730 case IR::Opcode::FPMul16:
731 case IR::Opcode::FPRoundEven16:
732 case IR::Opcode::FPFloor16:
733 case IR::Opcode::FPCeil16:
734 case IR::Opcode::FPTrunc16: {
735 const auto control{inst.Flags<IR::FpControl>()};
736 switch (control.fmz_mode) {
737 case IR::FmzMode::DontCare:
738 break;
739 case IR::FmzMode::FTZ:
740 case IR::FmzMode::FMZ:
741 info.uses_fp16_denorms_flush = true;
742 break;
743 case IR::FmzMode::None:
744 info.uses_fp16_denorms_preserve = true;
745 break;
746 }
747 break;
748 }
749 case IR::Opcode::FPAdd32:
750 case IR::Opcode::FPFma32:
751 case IR::Opcode::FPMul32:
752 case IR::Opcode::FPRoundEven32:
753 case IR::Opcode::FPFloor32:
754 case IR::Opcode::FPCeil32:
755 case IR::Opcode::FPTrunc32:
756 case IR::Opcode::FPOrdEqual32:
757 case IR::Opcode::FPUnordEqual32:
758 case IR::Opcode::FPOrdNotEqual32:
759 case IR::Opcode::FPUnordNotEqual32:
760 case IR::Opcode::FPOrdLessThan32:
761 case IR::Opcode::FPUnordLessThan32:
762 case IR::Opcode::FPOrdGreaterThan32:
763 case IR::Opcode::FPUnordGreaterThan32:
764 case IR::Opcode::FPOrdLessThanEqual32:
765 case IR::Opcode::FPUnordLessThanEqual32:
766 case IR::Opcode::FPOrdGreaterThanEqual32:
767 case IR::Opcode::FPUnordGreaterThanEqual32:
768 case IR::Opcode::ConvertF16F32:
769 case IR::Opcode::ConvertF64F32: {
770 const auto control{inst.Flags<IR::FpControl>()};
771 switch (control.fmz_mode) {
772 case IR::FmzMode::DontCare:
773 break;
774 case IR::FmzMode::FTZ:
775 case IR::FmzMode::FMZ:
776 info.uses_fp32_denorms_flush = true;
777 break;
778 case IR::FmzMode::None:
779 info.uses_fp32_denorms_preserve = true;
780 break;
781 }
782 break;
783 }
784 default:
785 break;
786 }
787}
788
789void VisitCbufs(Info& info, IR::Inst& inst) {
790 switch (inst.GetOpcode()) {
791 case IR::Opcode::GetCbufU8:
792 case IR::Opcode::GetCbufS8:
793 case IR::Opcode::GetCbufU16:
794 case IR::Opcode::GetCbufS16:
795 case IR::Opcode::GetCbufU32:
796 case IR::Opcode::GetCbufF32:
797 case IR::Opcode::GetCbufU32x2: {
798 CheckCBufNVN(info, inst);
799 break;
800 }
801 default:
802 break;
803 }
804}
805
806void Visit(Info& info, IR::Inst& inst) {
807 VisitUsages(info, inst);
808 VisitFpModifiers(info, inst);
809 VisitCbufs(info, inst);
810}
811
812void GatherInfoFromHeader(Environment& env, Info& info) {
813 Stage stage{env.ShaderStage()};
814 if (stage == Stage::Compute) {
815 return;
816 }
817 const auto& header{env.SPH()};
818 if (stage == Stage::Fragment) {
819 if (!info.loads_indexed_attributes) {
820 return;
821 }
822 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
823 const size_t offset{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
824 const auto vector{header.ps.imap_generic_vector[index]};
825 info.loads.mask[offset + 0] = vector.x != PixelImap::Unused;
826 info.loads.mask[offset + 1] = vector.y != PixelImap::Unused;
827 info.loads.mask[offset + 2] = vector.z != PixelImap::Unused;
828 info.loads.mask[offset + 3] = vector.w != PixelImap::Unused;
829 }
830 return;
831 }
832 if (info.loads_indexed_attributes) {
833 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
834 const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
835 const auto mask = header.vtg.InputGeneric(index);
836 for (size_t i = 0; i < 4; ++i) {
837 info.loads.Set(attribute + i, mask[i]);
838 }
839 }
840 for (size_t index = 0; index < 8; ++index) {
841 const u16 mask{header.vtg.clip_distances};
842 info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
843 }
844 info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0);
845 info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0);
846 info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0);
847 info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0);
848 info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0);
849 info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0);
850 info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0);
851 info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0);
852 info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0);
853 info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0);
854 info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0);
855 info.loads.Set(IR::Attribute::TessellationEvaluationPointU,
856 header.vtg.tessellation_eval_point_u != 0);
857 info.loads.Set(IR::Attribute::TessellationEvaluationPointV,
858 header.vtg.tessellation_eval_point_v != 0);
859 info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0);
860 info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0);
861 // TODO: Legacy varyings
862 }
863 if (info.stores_indexed_attributes) {
864 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
865 const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4};
866 const auto mask{header.vtg.OutputGeneric(index)};
867 for (size_t i = 0; i < 4; ++i) {
868 info.stores.Set(attribute + i, mask[i]);
869 }
870 }
871 for (size_t index = 0; index < 8; ++index) {
872 const u16 mask{header.vtg.omap_systemc.clip_distances};
873 info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0);
874 }
875 info.stores.Set(IR::Attribute::PrimitiveId,
876 header.vtg.omap_systemb.primitive_array_id != 0);
877 info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0);
878 info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0);
879 info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0);
880 info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0);
881 info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0);
882 info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0);
883 info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0);
884 info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0);
885 info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0);
886 info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0);
887 info.stores.Set(IR::Attribute::TessellationEvaluationPointU,
888 header.vtg.omap_systemc.tessellation_eval_point_u != 0);
889 info.stores.Set(IR::Attribute::TessellationEvaluationPointV,
890 header.vtg.omap_systemc.tessellation_eval_point_v != 0);
891 info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0);
892 info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0);
893 // TODO: Legacy varyings
894 }
895}
896} // Anonymous namespace
897
898void CollectShaderInfoPass(Environment& env, IR::Program& program) {
899 Info& info{program.info};
900 const u32 base{[&] {
901 switch (program.stage) {
902 case Stage::VertexA:
903 case Stage::VertexB:
904 return 0x110u;
905 case Stage::TessellationControl:
906 return 0x210u;
907 case Stage::TessellationEval:
908 return 0x310u;
909 case Stage::Geometry:
910 return 0x410u;
911 case Stage::Fragment:
912 return 0x510u;
913 case Stage::Compute:
914 return 0x310u;
915 }
916 throw InvalidArgument("Invalid stage {}", program.stage);
917 }()};
918 info.nvn_buffer_base = base;
919
920 for (IR::Block* const block : program.post_order_blocks) {
921 for (IR::Inst& inst : block->Instructions()) {
922 Visit(info, inst);
923 }
924 }
925 GatherInfoFromHeader(env, info);
926}
927
928} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
new file mode 100644
index 000000000..8dd6d6c2c
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -0,0 +1,610 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7#include <type_traits>
8
9#include "common/bit_cast.h"
10#include "common/bit_util.h"
11#include "shader_recompiler/exception.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/ir_opt/passes.h"
15
16namespace Shader::Optimization {
17namespace {
18// Metaprogramming stuff to get arguments information out of a lambda
19template <typename Func>
20struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {};
21
22template <typename ReturnType, typename LambdaType, typename... Args>
23struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> {
24 template <size_t I>
25 using ArgType = std::tuple_element_t<I, std::tuple<Args...>>;
26
27 static constexpr size_t NUM_ARGS{sizeof...(Args)};
28};
29
30template <typename T>
31[[nodiscard]] T Arg(const IR::Value& value) {
32 if constexpr (std::is_same_v<T, bool>) {
33 return value.U1();
34 } else if constexpr (std::is_same_v<T, u32>) {
35 return value.U32();
36 } else if constexpr (std::is_same_v<T, s32>) {
37 return static_cast<s32>(value.U32());
38 } else if constexpr (std::is_same_v<T, f32>) {
39 return value.F32();
40 } else if constexpr (std::is_same_v<T, u64>) {
41 return value.U64();
42 }
43}
44
45template <typename T, typename ImmFn>
46bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
47 const IR::Value lhs{inst.Arg(0)};
48 const IR::Value rhs{inst.Arg(1)};
49
50 const bool is_lhs_immediate{lhs.IsImmediate()};
51 const bool is_rhs_immediate{rhs.IsImmediate()};
52
53 if (is_lhs_immediate && is_rhs_immediate) {
54 const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
55 inst.ReplaceUsesWith(IR::Value{result});
56 return false;
57 }
58 if (is_lhs_immediate && !is_rhs_immediate) {
59 IR::Inst* const rhs_inst{rhs.InstRecursive()};
60 if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
61 const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
62 inst.SetArg(0, rhs_inst->Arg(0));
63 inst.SetArg(1, IR::Value{combined});
64 } else {
65 // Normalize
66 inst.SetArg(0, rhs);
67 inst.SetArg(1, lhs);
68 }
69 }
70 if (!is_lhs_immediate && is_rhs_immediate) {
71 const IR::Inst* const lhs_inst{lhs.InstRecursive()};
72 if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
73 const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
74 inst.SetArg(0, lhs_inst->Arg(0));
75 inst.SetArg(1, IR::Value{combined});
76 }
77 }
78 return true;
79}
80
81template <typename Func>
82bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
83 if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
84 return false;
85 }
86 using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>;
87 inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
88 return true;
89}
90
91void FoldGetRegister(IR::Inst& inst) {
92 if (inst.Arg(0).Reg() == IR::Reg::RZ) {
93 inst.ReplaceUsesWith(IR::Value{u32{0}});
94 }
95}
96
97void FoldGetPred(IR::Inst& inst) {
98 if (inst.Arg(0).Pred() == IR::Pred::PT) {
99 inst.ReplaceUsesWith(IR::Value{true});
100 }
101}
102
103/// Replaces the pattern generated by two XMAD multiplications
104bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
105 /*
106 * We are looking for this pattern:
107 * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
108 * %rhs_mul = IMul32 %rhs_bfe, %factor_b
109 * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
110 * %rhs_mul = IMul32 %lhs_bfe, %factor_b
111 * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
112 * %result = IAdd32 %lhs_shl, %rhs_mul
113 *
114 * And replacing it with
115 * %result = IMul32 %factor_a, %factor_b
116 *
117 * This optimization has been proven safe by LLVM and MSVC.
118 */
119 const IR::Value lhs_arg{inst.Arg(0)};
120 const IR::Value rhs_arg{inst.Arg(1)};
121 if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) {
122 return false;
123 }
124 IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
125 if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
126 lhs_shl->Arg(1) != IR::Value{16U}) {
127 return false;
128 }
129 if (lhs_shl->Arg(0).IsImmediate()) {
130 return false;
131 }
132 IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
133 IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
134 if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
135 return false;
136 }
137 if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
138 return false;
139 }
140 const IR::U32 factor_b{lhs_mul->Arg(1)};
141 if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) {
142 return false;
143 }
144 IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
145 IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
146 if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
147 return false;
148 }
149 if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
150 return false;
151 }
152 if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
153 return false;
154 }
155 if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
156 return false;
157 }
158 if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) {
159 return false;
160 }
161 const IR::U32 factor_a{lhs_bfe->Arg(0)};
162 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
163 inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
164 return true;
165}
166
167template <typename T>
168void FoldAdd(IR::Block& block, IR::Inst& inst) {
169 if (inst.HasAssociatedPseudoOperation()) {
170 return;
171 }
172 if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
173 return;
174 }
175 const IR::Value rhs{inst.Arg(1)};
176 if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
177 inst.ReplaceUsesWith(inst.Arg(0));
178 return;
179 }
180 if constexpr (std::is_same_v<T, u32>) {
181 if (FoldXmadMultiply(block, inst)) {
182 return;
183 }
184 }
185}
186
187void FoldISub32(IR::Inst& inst) {
188 if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) {
189 return;
190 }
191 if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) {
192 return;
193 }
194 // ISub32 is generally used to subtract two constant buffers, compare and replace this with
195 // zero if they equal.
196 const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) {
197 return a->GetOpcode() == IR::Opcode::GetCbufU32 &&
198 b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) &&
199 a->Arg(1) == b->Arg(1);
200 }};
201 IR::Inst* op_a{inst.Arg(0).InstRecursive()};
202 IR::Inst* op_b{inst.Arg(1).InstRecursive()};
203 if (equal_cbuf(op_a, op_b)) {
204 inst.ReplaceUsesWith(IR::Value{u32{0}});
205 return;
206 }
207 // It's also possible a value is being added to a cbuf and then subtracted
208 if (op_b->GetOpcode() == IR::Opcode::IAdd32) {
209 // Canonicalize local variables to simplify the following logic
210 std::swap(op_a, op_b);
211 }
212 if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) {
213 return;
214 }
215 IR::Inst* const inst_cbuf{op_b};
216 if (op_a->GetOpcode() != IR::Opcode::IAdd32) {
217 return;
218 }
219 IR::Value add_op_a{op_a->Arg(0)};
220 IR::Value add_op_b{op_a->Arg(1)};
221 if (add_op_b.IsImmediate()) {
222 // Canonicalize
223 std::swap(add_op_a, add_op_b);
224 }
225 if (add_op_b.IsImmediate()) {
226 return;
227 }
228 IR::Inst* const add_cbuf{add_op_b.InstRecursive()};
229 if (equal_cbuf(add_cbuf, inst_cbuf)) {
230 inst.ReplaceUsesWith(add_op_a);
231 }
232}
233
234void FoldSelect(IR::Inst& inst) {
235 const IR::Value cond{inst.Arg(0)};
236 if (cond.IsImmediate()) {
237 inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2));
238 }
239}
240
241void FoldFPMul32(IR::Inst& inst) {
242 const auto control{inst.Flags<IR::FpControl>()};
243 if (control.no_contraction) {
244 return;
245 }
246 // Fold interpolation operations
247 const IR::Value lhs_value{inst.Arg(0)};
248 const IR::Value rhs_value{inst.Arg(1)};
249 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
250 return;
251 }
252 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
253 IR::Inst* const rhs_op{rhs_value.InstRecursive()};
254 if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
255 rhs_op->GetOpcode() != IR::Opcode::FPRecip32) {
256 return;
257 }
258 const IR::Value recip_source{rhs_op->Arg(0)};
259 const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()};
260 if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) {
261 return;
262 }
263 IR::Inst* const attr_a{recip_source.InstRecursive()};
264 IR::Inst* const attr_b{lhs_mul_source.InstRecursive()};
265 if (attr_a->GetOpcode() != IR::Opcode::GetAttribute ||
266 attr_b->GetOpcode() != IR::Opcode::GetAttribute) {
267 return;
268 }
269 if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) {
270 inst.ReplaceUsesWith(lhs_op->Arg(0));
271 }
272}
273
274void FoldLogicalAnd(IR::Inst& inst) {
275 if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) {
276 return;
277 }
278 const IR::Value rhs{inst.Arg(1)};
279 if (rhs.IsImmediate()) {
280 if (rhs.U1()) {
281 inst.ReplaceUsesWith(inst.Arg(0));
282 } else {
283 inst.ReplaceUsesWith(IR::Value{false});
284 }
285 }
286}
287
288void FoldLogicalOr(IR::Inst& inst) {
289 if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) {
290 return;
291 }
292 const IR::Value rhs{inst.Arg(1)};
293 if (rhs.IsImmediate()) {
294 if (rhs.U1()) {
295 inst.ReplaceUsesWith(IR::Value{true});
296 } else {
297 inst.ReplaceUsesWith(inst.Arg(0));
298 }
299 }
300}
301
302void FoldLogicalNot(IR::Inst& inst) {
303 const IR::U1 value{inst.Arg(0)};
304 if (value.IsImmediate()) {
305 inst.ReplaceUsesWith(IR::Value{!value.U1()});
306 return;
307 }
308 IR::Inst* const arg{value.InstRecursive()};
309 if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
310 inst.ReplaceUsesWith(arg->Arg(0));
311 }
312}
313
314template <IR::Opcode op, typename Dest, typename Source>
315void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
316 const IR::Value value{inst.Arg(0)};
317 if (value.IsImmediate()) {
318 inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))});
319 return;
320 }
321 IR::Inst* const arg_inst{value.InstRecursive()};
322 if (arg_inst->GetOpcode() == reverse) {
323 inst.ReplaceUsesWith(arg_inst->Arg(0));
324 return;
325 }
326 if constexpr (op == IR::Opcode::BitCastF32U32) {
327 if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) {
328 // Replace the bitcast with a typed constant buffer read
329 inst.ReplaceOpcode(IR::Opcode::GetCbufF32);
330 inst.SetArg(0, arg_inst->Arg(0));
331 inst.SetArg(1, arg_inst->Arg(1));
332 return;
333 }
334 }
335}
336
337void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
338 const IR::Value value{inst.Arg(0)};
339 if (value.IsImmediate()) {
340 return;
341 }
342 IR::Inst* const arg_inst{value.InstRecursive()};
343 if (arg_inst->GetOpcode() == reverse) {
344 inst.ReplaceUsesWith(arg_inst->Arg(0));
345 return;
346 }
347}
348
349template <typename Func, size_t... I>
350IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
351 using Traits = LambdaTraits<decltype(func)>;
352 return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
353}
354
355std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
356 IR::Opcode construct, u32 first_index) {
357 IR::Inst* const inst{inst_value.InstRecursive()};
358 if (inst->GetOpcode() == construct) {
359 return inst->Arg(first_index);
360 }
361 if (inst->GetOpcode() != insert) {
362 return std::nullopt;
363 }
364 IR::Value value_index{inst->Arg(2)};
365 if (!value_index.IsImmediate()) {
366 return std::nullopt;
367 }
368 const u32 second_index{value_index.U32()};
369 if (first_index != second_index) {
370 IR::Value value_composite{inst->Arg(0)};
371 if (value_composite.IsImmediate()) {
372 return std::nullopt;
373 }
374 return FoldCompositeExtractImpl(value_composite, insert, construct, first_index);
375 }
376 return inst->Arg(1);
377}
378
379void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) {
380 const IR::Value value_1{inst.Arg(0)};
381 const IR::Value value_2{inst.Arg(1)};
382 if (value_1.IsImmediate()) {
383 return;
384 }
385 if (!value_2.IsImmediate()) {
386 return;
387 }
388 const u32 first_index{value_2.U32()};
389 const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)};
390 if (!result) {
391 return;
392 }
393 inst.ReplaceUsesWith(*result);
394}
395
396IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) {
397 if (value.IsImmediate()) {
398 return value;
399 }
400 IR::Inst* const inst{value.InstRecursive()};
401 if (inst->GetOpcode() == expected_cast) {
402 return inst->Arg(0).Resolve();
403 }
404 return value;
405}
406
407void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
408 const IR::Value swizzle{inst.Arg(2)};
409 if (!swizzle.IsImmediate()) {
410 return;
411 }
412 const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)};
413 const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)};
414 if (value_1.IsImmediate()) {
415 return;
416 }
417 const u32 swizzle_value{swizzle.U32()};
418 if (swizzle_value != 0x99 && swizzle_value != 0xA5) {
419 return;
420 }
421 IR::Inst* const inst2{value_1.InstRecursive()};
422 if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) {
423 return;
424 }
425 const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
426 if (value_2 != value_3) {
427 return;
428 }
429 const IR::Value index{inst2->Arg(1)};
430 const IR::Value clamp{inst2->Arg(2)};
431 const IR::Value segmentation_mask{inst2->Arg(3)};
432 if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
433 return;
434 }
435 if (clamp.U32() != 3 || segmentation_mask.U32() != 28) {
436 return;
437 }
438 if (swizzle_value == 0x99) {
439 // DPdxFine
440 if (index.U32() == 1) {
441 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
442 inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)}));
443 }
444 } else if (swizzle_value == 0xA5) {
445 // DPdyFine
446 if (index.U32() == 2) {
447 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
448 inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)}));
449 }
450 }
451}
452
453void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
454 switch (inst.GetOpcode()) {
455 case IR::Opcode::GetRegister:
456 return FoldGetRegister(inst);
457 case IR::Opcode::GetPred:
458 return FoldGetPred(inst);
459 case IR::Opcode::IAdd32:
460 return FoldAdd<u32>(block, inst);
461 case IR::Opcode::ISub32:
462 return FoldISub32(inst);
463 case IR::Opcode::IMul32:
464 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
465 return;
466 case IR::Opcode::ShiftRightArithmetic32:
467 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
468 return;
469 case IR::Opcode::BitCastF32U32:
470 return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
471 case IR::Opcode::BitCastU32F32:
472 return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
473 case IR::Opcode::IAdd64:
474 return FoldAdd<u64>(block, inst);
475 case IR::Opcode::PackHalf2x16:
476 return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
477 case IR::Opcode::UnpackHalf2x16:
478 return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
479 case IR::Opcode::SelectU1:
480 case IR::Opcode::SelectU8:
481 case IR::Opcode::SelectU16:
482 case IR::Opcode::SelectU32:
483 case IR::Opcode::SelectU64:
484 case IR::Opcode::SelectF16:
485 case IR::Opcode::SelectF32:
486 case IR::Opcode::SelectF64:
487 return FoldSelect(inst);
488 case IR::Opcode::FPMul32:
489 return FoldFPMul32(inst);
490 case IR::Opcode::LogicalAnd:
491 return FoldLogicalAnd(inst);
492 case IR::Opcode::LogicalOr:
493 return FoldLogicalOr(inst);
494 case IR::Opcode::LogicalNot:
495 return FoldLogicalNot(inst);
496 case IR::Opcode::SLessThan:
497 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
498 return;
499 case IR::Opcode::ULessThan:
500 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
501 return;
502 case IR::Opcode::SLessThanEqual:
503 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
504 return;
505 case IR::Opcode::ULessThanEqual:
506 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
507 return;
508 case IR::Opcode::SGreaterThan:
509 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
510 return;
511 case IR::Opcode::UGreaterThan:
512 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
513 return;
514 case IR::Opcode::SGreaterThanEqual:
515 FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
516 return;
517 case IR::Opcode::UGreaterThanEqual:
518 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
519 return;
520 case IR::Opcode::IEqual:
521 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
522 return;
523 case IR::Opcode::INotEqual:
524 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
525 return;
526 case IR::Opcode::BitwiseAnd32:
527 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
528 return;
529 case IR::Opcode::BitwiseOr32:
530 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
531 return;
532 case IR::Opcode::BitwiseXor32:
533 FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
534 return;
535 case IR::Opcode::BitFieldUExtract:
536 FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
537 if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
538 throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
539 base, shift, count);
540 }
541 return (base >> shift) & ((1U << count) - 1);
542 });
543 return;
544 case IR::Opcode::BitFieldSExtract:
545 FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
546 const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
547 const size_t left_shift{32 - back_shift};
548 const size_t right_shift{static_cast<size_t>(32 - count)};
549 if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
550 throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
551 base, shift, count);
552 }
553 return static_cast<u32>((base << left_shift) >> right_shift);
554 });
555 return;
556 case IR::Opcode::BitFieldInsert:
557 FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
558 if (bits >= 32 || offset >= 32) {
559 throw LogicError("Undefined result in {}({}, {}, {}, {})",
560 IR::Opcode::BitFieldInsert, base, insert, offset, bits);
561 }
562 return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
563 });
564 return;
565 case IR::Opcode::CompositeExtractU32x2:
566 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2,
567 IR::Opcode::CompositeInsertU32x2);
568 case IR::Opcode::CompositeExtractU32x3:
569 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3,
570 IR::Opcode::CompositeInsertU32x3);
571 case IR::Opcode::CompositeExtractU32x4:
572 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4,
573 IR::Opcode::CompositeInsertU32x4);
574 case IR::Opcode::CompositeExtractF32x2:
575 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2,
576 IR::Opcode::CompositeInsertF32x2);
577 case IR::Opcode::CompositeExtractF32x3:
578 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3,
579 IR::Opcode::CompositeInsertF32x3);
580 case IR::Opcode::CompositeExtractF32x4:
581 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4,
582 IR::Opcode::CompositeInsertF32x4);
583 case IR::Opcode::CompositeExtractF16x2:
584 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2,
585 IR::Opcode::CompositeInsertF16x2);
586 case IR::Opcode::CompositeExtractF16x3:
587 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3,
588 IR::Opcode::CompositeInsertF16x3);
589 case IR::Opcode::CompositeExtractF16x4:
590 return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4,
591 IR::Opcode::CompositeInsertF16x4);
592 case IR::Opcode::FSwizzleAdd:
593 return FoldFSwizzleAdd(block, inst);
594 default:
595 break;
596 }
597}
598} // Anonymous namespace
599
600void ConstantPropagationPass(IR::Program& program) {
601 const auto end{program.post_order_blocks.rend()};
602 for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
603 IR::Block* const block{*it};
604 for (IR::Inst& inst : block->Instructions()) {
605 ConstantPropagation(*block, inst);
606 }
607 }
608}
609
610} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
new file mode 100644
index 000000000..400836301
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp
@@ -0,0 +1,26 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/ir/basic_block.h"
6#include "shader_recompiler/frontend/ir/value.h"
7#include "shader_recompiler/ir_opt/passes.h"
8
9namespace Shader::Optimization {
10
11void DeadCodeEliminationPass(IR::Program& program) {
12 // We iterate over the instructions in reverse order.
13 // This is because removing an instruction reduces the number of uses for earlier instructions.
14 for (IR::Block* const block : program.post_order_blocks) {
15 auto it{block->end()};
16 while (it != block->begin()) {
17 --it;
18 if (!it->HasUses() && !it->MayHaveSideEffects()) {
19 it->Invalidate();
20 it = block->Instructions().erase(it);
21 }
22 }
23 }
24}
25
26} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
new file mode 100644
index 000000000..055ba9c54
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/ir/ir_emitter.h"
6#include "shader_recompiler/ir_opt/passes.h"
7
8namespace Shader::Optimization {
9
10void VertexATransformPass(IR::Program& program) {
11 for (IR::Block* const block : program.blocks) {
12 for (IR::Inst& inst : block->Instructions()) {
13 if (inst.GetOpcode() == IR::Opcode::Epilogue) {
14 return inst.Invalidate();
15 }
16 }
17 }
18}
19
20void VertexBTransformPass(IR::Program& program) {
21 for (IR::Block* const block : program.blocks) {
22 for (IR::Inst& inst : block->Instructions()) {
23 if (inst.GetOpcode() == IR::Opcode::Prologue) {
24 return inst.Invalidate();
25 }
26 }
27 }
28}
29
30} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
new file mode 100644
index 000000000..4197b0095
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -0,0 +1,526 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <compare>
7#include <optional>
8#include <queue>
9
10#include <boost/container/flat_set.hpp>
11#include <boost/container/small_vector.hpp>
12
13#include "common/alignment.h"
14#include "shader_recompiler/frontend/ir/basic_block.h"
15#include "shader_recompiler/frontend/ir/breadth_first_search.h"
16#include "shader_recompiler/frontend/ir/ir_emitter.h"
17#include "shader_recompiler/frontend/ir/value.h"
18#include "shader_recompiler/ir_opt/passes.h"
19
20namespace Shader::Optimization {
21namespace {
22/// Address in constant buffers to the storage buffer descriptor
23struct StorageBufferAddr {
24 auto operator<=>(const StorageBufferAddr&) const noexcept = default;
25
26 u32 index;
27 u32 offset;
28};
29
30/// Block iterator to a global memory instruction and the storage buffer it uses
31struct StorageInst {
32 StorageBufferAddr storage_buffer;
33 IR::Inst* inst;
34 IR::Block* block;
35};
36
37/// Bias towards a certain range of constant buffers when looking for storage buffers
38struct Bias {
39 u32 index;
40 u32 offset_begin;
41 u32 offset_end;
42};
43
44using boost::container::flat_set;
45using boost::container::small_vector;
46using StorageBufferSet =
47 flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
48using StorageInstVector = small_vector<StorageInst, 24>;
49using StorageWritesSet =
50 flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
51
52struct StorageInfo {
53 StorageBufferSet set;
54 StorageInstVector to_replace;
55 StorageWritesSet writes;
56};
57
58/// Returns true when the instruction is a global memory instruction
59bool IsGlobalMemory(const IR::Inst& inst) {
60 switch (inst.GetOpcode()) {
61 case IR::Opcode::LoadGlobalS8:
62 case IR::Opcode::LoadGlobalU8:
63 case IR::Opcode::LoadGlobalS16:
64 case IR::Opcode::LoadGlobalU16:
65 case IR::Opcode::LoadGlobal32:
66 case IR::Opcode::LoadGlobal64:
67 case IR::Opcode::LoadGlobal128:
68 case IR::Opcode::WriteGlobalS8:
69 case IR::Opcode::WriteGlobalU8:
70 case IR::Opcode::WriteGlobalS16:
71 case IR::Opcode::WriteGlobalU16:
72 case IR::Opcode::WriteGlobal32:
73 case IR::Opcode::WriteGlobal64:
74 case IR::Opcode::WriteGlobal128:
75 case IR::Opcode::GlobalAtomicIAdd32:
76 case IR::Opcode::GlobalAtomicSMin32:
77 case IR::Opcode::GlobalAtomicUMin32:
78 case IR::Opcode::GlobalAtomicSMax32:
79 case IR::Opcode::GlobalAtomicUMax32:
80 case IR::Opcode::GlobalAtomicInc32:
81 case IR::Opcode::GlobalAtomicDec32:
82 case IR::Opcode::GlobalAtomicAnd32:
83 case IR::Opcode::GlobalAtomicOr32:
84 case IR::Opcode::GlobalAtomicXor32:
85 case IR::Opcode::GlobalAtomicExchange32:
86 case IR::Opcode::GlobalAtomicIAdd64:
87 case IR::Opcode::GlobalAtomicSMin64:
88 case IR::Opcode::GlobalAtomicUMin64:
89 case IR::Opcode::GlobalAtomicSMax64:
90 case IR::Opcode::GlobalAtomicUMax64:
91 case IR::Opcode::GlobalAtomicAnd64:
92 case IR::Opcode::GlobalAtomicOr64:
93 case IR::Opcode::GlobalAtomicXor64:
94 case IR::Opcode::GlobalAtomicExchange64:
95 case IR::Opcode::GlobalAtomicAddF32:
96 case IR::Opcode::GlobalAtomicAddF16x2:
97 case IR::Opcode::GlobalAtomicAddF32x2:
98 case IR::Opcode::GlobalAtomicMinF16x2:
99 case IR::Opcode::GlobalAtomicMinF32x2:
100 case IR::Opcode::GlobalAtomicMaxF16x2:
101 case IR::Opcode::GlobalAtomicMaxF32x2:
102 return true;
103 default:
104 return false;
105 }
106}
107
108/// Returns true when the instruction is a global memory instruction
109bool IsGlobalMemoryWrite(const IR::Inst& inst) {
110 switch (inst.GetOpcode()) {
111 case IR::Opcode::WriteGlobalS8:
112 case IR::Opcode::WriteGlobalU8:
113 case IR::Opcode::WriteGlobalS16:
114 case IR::Opcode::WriteGlobalU16:
115 case IR::Opcode::WriteGlobal32:
116 case IR::Opcode::WriteGlobal64:
117 case IR::Opcode::WriteGlobal128:
118 case IR::Opcode::GlobalAtomicIAdd32:
119 case IR::Opcode::GlobalAtomicSMin32:
120 case IR::Opcode::GlobalAtomicUMin32:
121 case IR::Opcode::GlobalAtomicSMax32:
122 case IR::Opcode::GlobalAtomicUMax32:
123 case IR::Opcode::GlobalAtomicInc32:
124 case IR::Opcode::GlobalAtomicDec32:
125 case IR::Opcode::GlobalAtomicAnd32:
126 case IR::Opcode::GlobalAtomicOr32:
127 case IR::Opcode::GlobalAtomicXor32:
128 case IR::Opcode::GlobalAtomicExchange32:
129 case IR::Opcode::GlobalAtomicIAdd64:
130 case IR::Opcode::GlobalAtomicSMin64:
131 case IR::Opcode::GlobalAtomicUMin64:
132 case IR::Opcode::GlobalAtomicSMax64:
133 case IR::Opcode::GlobalAtomicUMax64:
134 case IR::Opcode::GlobalAtomicAnd64:
135 case IR::Opcode::GlobalAtomicOr64:
136 case IR::Opcode::GlobalAtomicXor64:
137 case IR::Opcode::GlobalAtomicExchange64:
138 case IR::Opcode::GlobalAtomicAddF32:
139 case IR::Opcode::GlobalAtomicAddF16x2:
140 case IR::Opcode::GlobalAtomicAddF32x2:
141 case IR::Opcode::GlobalAtomicMinF16x2:
142 case IR::Opcode::GlobalAtomicMinF32x2:
143 case IR::Opcode::GlobalAtomicMaxF16x2:
144 case IR::Opcode::GlobalAtomicMaxF32x2:
145 return true;
146 default:
147 return false;
148 }
149}
150
151/// Converts a global memory opcode to its storage buffer equivalent
152IR::Opcode GlobalToStorage(IR::Opcode opcode) {
153 switch (opcode) {
154 case IR::Opcode::LoadGlobalS8:
155 return IR::Opcode::LoadStorageS8;
156 case IR::Opcode::LoadGlobalU8:
157 return IR::Opcode::LoadStorageU8;
158 case IR::Opcode::LoadGlobalS16:
159 return IR::Opcode::LoadStorageS16;
160 case IR::Opcode::LoadGlobalU16:
161 return IR::Opcode::LoadStorageU16;
162 case IR::Opcode::LoadGlobal32:
163 return IR::Opcode::LoadStorage32;
164 case IR::Opcode::LoadGlobal64:
165 return IR::Opcode::LoadStorage64;
166 case IR::Opcode::LoadGlobal128:
167 return IR::Opcode::LoadStorage128;
168 case IR::Opcode::WriteGlobalS8:
169 return IR::Opcode::WriteStorageS8;
170 case IR::Opcode::WriteGlobalU8:
171 return IR::Opcode::WriteStorageU8;
172 case IR::Opcode::WriteGlobalS16:
173 return IR::Opcode::WriteStorageS16;
174 case IR::Opcode::WriteGlobalU16:
175 return IR::Opcode::WriteStorageU16;
176 case IR::Opcode::WriteGlobal32:
177 return IR::Opcode::WriteStorage32;
178 case IR::Opcode::WriteGlobal64:
179 return IR::Opcode::WriteStorage64;
180 case IR::Opcode::WriteGlobal128:
181 return IR::Opcode::WriteStorage128;
182 case IR::Opcode::GlobalAtomicIAdd32:
183 return IR::Opcode::StorageAtomicIAdd32;
184 case IR::Opcode::GlobalAtomicSMin32:
185 return IR::Opcode::StorageAtomicSMin32;
186 case IR::Opcode::GlobalAtomicUMin32:
187 return IR::Opcode::StorageAtomicUMin32;
188 case IR::Opcode::GlobalAtomicSMax32:
189 return IR::Opcode::StorageAtomicSMax32;
190 case IR::Opcode::GlobalAtomicUMax32:
191 return IR::Opcode::StorageAtomicUMax32;
192 case IR::Opcode::GlobalAtomicInc32:
193 return IR::Opcode::StorageAtomicInc32;
194 case IR::Opcode::GlobalAtomicDec32:
195 return IR::Opcode::StorageAtomicDec32;
196 case IR::Opcode::GlobalAtomicAnd32:
197 return IR::Opcode::StorageAtomicAnd32;
198 case IR::Opcode::GlobalAtomicOr32:
199 return IR::Opcode::StorageAtomicOr32;
200 case IR::Opcode::GlobalAtomicXor32:
201 return IR::Opcode::StorageAtomicXor32;
202 case IR::Opcode::GlobalAtomicIAdd64:
203 return IR::Opcode::StorageAtomicIAdd64;
204 case IR::Opcode::GlobalAtomicSMin64:
205 return IR::Opcode::StorageAtomicSMin64;
206 case IR::Opcode::GlobalAtomicUMin64:
207 return IR::Opcode::StorageAtomicUMin64;
208 case IR::Opcode::GlobalAtomicSMax64:
209 return IR::Opcode::StorageAtomicSMax64;
210 case IR::Opcode::GlobalAtomicUMax64:
211 return IR::Opcode::StorageAtomicUMax64;
212 case IR::Opcode::GlobalAtomicAnd64:
213 return IR::Opcode::StorageAtomicAnd64;
214 case IR::Opcode::GlobalAtomicOr64:
215 return IR::Opcode::StorageAtomicOr64;
216 case IR::Opcode::GlobalAtomicXor64:
217 return IR::Opcode::StorageAtomicXor64;
218 case IR::Opcode::GlobalAtomicExchange32:
219 return IR::Opcode::StorageAtomicExchange32;
220 case IR::Opcode::GlobalAtomicExchange64:
221 return IR::Opcode::StorageAtomicExchange64;
222 case IR::Opcode::GlobalAtomicAddF32:
223 return IR::Opcode::StorageAtomicAddF32;
224 case IR::Opcode::GlobalAtomicAddF16x2:
225 return IR::Opcode::StorageAtomicAddF16x2;
226 case IR::Opcode::GlobalAtomicMinF16x2:
227 return IR::Opcode::StorageAtomicMinF16x2;
228 case IR::Opcode::GlobalAtomicMaxF16x2:
229 return IR::Opcode::StorageAtomicMaxF16x2;
230 case IR::Opcode::GlobalAtomicAddF32x2:
231 return IR::Opcode::StorageAtomicAddF32x2;
232 case IR::Opcode::GlobalAtomicMinF32x2:
233 return IR::Opcode::StorageAtomicMinF32x2;
234 case IR::Opcode::GlobalAtomicMaxF32x2:
235 return IR::Opcode::StorageAtomicMaxF32x2;
236 default:
237 throw InvalidArgument("Invalid global memory opcode {}", opcode);
238 }
239}
240
241/// Returns true when a storage buffer address satisfies a bias
242bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
243 return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
244 storage_buffer.offset < bias.offset_end;
245}
246
247struct LowAddrInfo {
248 IR::U32 value;
249 s32 imm_offset;
250};
251
252/// Tries to track the first 32-bits of a global memory instruction
253std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
254 // The first argument is the low level GPU pointer to the global memory instruction
255 const IR::Value addr{inst->Arg(0)};
256 if (addr.IsImmediate()) {
257 // Not much we can do if it's an immediate
258 return std::nullopt;
259 }
260 // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
261 IR::Inst* addr_inst{addr.InstRecursive()};
262 s32 imm_offset{0};
263 if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
264 // If it's an IAdd64, get the immediate offset it is applying and grab the address
265 // instruction. This expects for the instruction to be canonicalized having the address on
266 // the first argument and the immediate offset on the second one.
267 const IR::U64 imm_offset_value{addr_inst->Arg(1)};
268 if (!imm_offset_value.IsImmediate()) {
269 return std::nullopt;
270 }
271 imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
272 const IR::U64 iadd_addr{addr_inst->Arg(0)};
273 if (iadd_addr.IsImmediate()) {
274 return std::nullopt;
275 }
276 addr_inst = iadd_addr.InstRecursive();
277 }
278 // With IAdd64 handled, now PackUint2x32 is expected
279 if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
280 // PackUint2x32 is expected to be generated from a vector
281 const IR::Value vector{addr_inst->Arg(0)};
282 if (vector.IsImmediate()) {
283 return std::nullopt;
284 }
285 addr_inst = vector.InstRecursive();
286 }
287 // The vector is expected to be a CompositeConstructU32x2
288 if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
289 return std::nullopt;
290 }
291 // Grab the first argument from the CompositeConstructU32x2, this is the low address.
292 return LowAddrInfo{
293 .value{IR::U32{addr_inst->Arg(0)}},
294 .imm_offset = imm_offset,
295 };
296}
297
298/// Tries to track the storage buffer address used by a global memory instruction
299std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
300 const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
301 if (inst->GetOpcode() != IR::Opcode::GetCbufU32) {
302 return std::nullopt;
303 }
304 const IR::Value index{inst->Arg(0)};
305 const IR::Value offset{inst->Arg(1)};
306 if (!index.IsImmediate()) {
307 // Definitely not a storage buffer if it's read from a
308 // non-immediate index
309 return std::nullopt;
310 }
311 if (!offset.IsImmediate()) {
312 // TODO: Support SSBO arrays
313 return std::nullopt;
314 }
315 const StorageBufferAddr storage_buffer{
316 .index = index.U32(),
317 .offset = offset.U32(),
318 };
319 if (!Common::IsAligned(storage_buffer.offset, 16)) {
320 // The SSBO pointer has to be aligned
321 return std::nullopt;
322 }
323 if (bias && !MeetsBias(storage_buffer, *bias)) {
324 // We have to blacklist some addresses in case we wrongly
325 // point to them
326 return std::nullopt;
327 }
328 return storage_buffer;
329 }};
330 return BreadthFirstSearch(value, pred);
331}
332
333/// Collects the storage buffer used by a global memory instruction and the instruction itself
334void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
335 // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
336 // avoid getting false positives
337 static constexpr Bias nvn_bias{
338 .index = 0,
339 .offset_begin = 0x110,
340 .offset_end = 0x610,
341 };
342 // Track the low address of the instruction
343 const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
344 if (!low_addr_info) {
345 // Failed to track the low address, use NVN fallbacks
346 return;
347 }
348 // First try to find storage buffers in the NVN address
349 const IR::U32 low_addr{low_addr_info->value};
350 std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
351 if (!storage_buffer) {
352 // If it fails, track without a bias
353 storage_buffer = Track(low_addr, nullptr);
354 if (!storage_buffer) {
355 // If that also fails, use NVN fallbacks
356 return;
357 }
358 }
359 // Collect storage buffer and the instruction
360 if (IsGlobalMemoryWrite(inst)) {
361 info.writes.insert(*storage_buffer);
362 }
363 info.set.insert(*storage_buffer);
364 info.to_replace.push_back(StorageInst{
365 .storage_buffer{*storage_buffer},
366 .inst = &inst,
367 .block = &block,
368 });
369}
370
371/// Returns the offset in indices (not bytes) for an equivalent storage instruction
372IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
373 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
374 IR::U32 offset;
375 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
376 offset = low_addr->value;
377 if (low_addr->imm_offset != 0) {
378 offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
379 }
380 } else {
381 offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
382 }
383 // Subtract the least significant 32 bits from the guest offset. The result is the storage
384 // buffer offset in bytes.
385 const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
386 return ir.ISub(offset, low_cbuf);
387}
388
389/// Replace a global memory load instruction with its storage buffer equivalent
390void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
391 const IR::U32& offset) {
392 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
393 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
394 const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
395 inst.ReplaceUsesWith(value);
396}
397
398/// Replace a global memory write instruction with its storage buffer equivalent
399void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
400 const IR::U32& offset) {
401 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
402 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
403 block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
404 inst.Invalidate();
405}
406
407/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
408void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
409 const IR::U32& offset) {
410 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
411 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
412 const IR::Value value{
413 &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
414 inst.ReplaceUsesWith(value);
415}
416
417/// Replace a global memory instruction with its storage buffer equivalent
418void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
419 const IR::U32& offset) {
420 switch (inst.GetOpcode()) {
421 case IR::Opcode::LoadGlobalS8:
422 case IR::Opcode::LoadGlobalU8:
423 case IR::Opcode::LoadGlobalS16:
424 case IR::Opcode::LoadGlobalU16:
425 case IR::Opcode::LoadGlobal32:
426 case IR::Opcode::LoadGlobal64:
427 case IR::Opcode::LoadGlobal128:
428 return ReplaceLoad(block, inst, storage_index, offset);
429 case IR::Opcode::WriteGlobalS8:
430 case IR::Opcode::WriteGlobalU8:
431 case IR::Opcode::WriteGlobalS16:
432 case IR::Opcode::WriteGlobalU16:
433 case IR::Opcode::WriteGlobal32:
434 case IR::Opcode::WriteGlobal64:
435 case IR::Opcode::WriteGlobal128:
436 return ReplaceWrite(block, inst, storage_index, offset);
437 case IR::Opcode::GlobalAtomicIAdd32:
438 case IR::Opcode::GlobalAtomicSMin32:
439 case IR::Opcode::GlobalAtomicUMin32:
440 case IR::Opcode::GlobalAtomicSMax32:
441 case IR::Opcode::GlobalAtomicUMax32:
442 case IR::Opcode::GlobalAtomicInc32:
443 case IR::Opcode::GlobalAtomicDec32:
444 case IR::Opcode::GlobalAtomicAnd32:
445 case IR::Opcode::GlobalAtomicOr32:
446 case IR::Opcode::GlobalAtomicXor32:
447 case IR::Opcode::GlobalAtomicExchange32:
448 case IR::Opcode::GlobalAtomicIAdd64:
449 case IR::Opcode::GlobalAtomicSMin64:
450 case IR::Opcode::GlobalAtomicUMin64:
451 case IR::Opcode::GlobalAtomicSMax64:
452 case IR::Opcode::GlobalAtomicUMax64:
453 case IR::Opcode::GlobalAtomicAnd64:
454 case IR::Opcode::GlobalAtomicOr64:
455 case IR::Opcode::GlobalAtomicXor64:
456 case IR::Opcode::GlobalAtomicExchange64:
457 case IR::Opcode::GlobalAtomicAddF32:
458 case IR::Opcode::GlobalAtomicAddF16x2:
459 case IR::Opcode::GlobalAtomicAddF32x2:
460 case IR::Opcode::GlobalAtomicMinF16x2:
461 case IR::Opcode::GlobalAtomicMinF32x2:
462 case IR::Opcode::GlobalAtomicMaxF16x2:
463 case IR::Opcode::GlobalAtomicMaxF32x2:
464 return ReplaceAtomic(block, inst, storage_index, offset);
465 default:
466 throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
467 }
468}
469} // Anonymous namespace
470
471void GlobalMemoryToStorageBufferPass(IR::Program& program) {
472 StorageInfo info;
473 for (IR::Block* const block : program.post_order_blocks) {
474 for (IR::Inst& inst : block->Instructions()) {
475 if (!IsGlobalMemory(inst)) {
476 continue;
477 }
478 CollectStorageBuffers(*block, inst, info);
479 }
480 }
481 for (const StorageBufferAddr& storage_buffer : info.set) {
482 program.info.storage_buffers_descriptors.push_back({
483 .cbuf_index = storage_buffer.index,
484 .cbuf_offset = storage_buffer.offset,
485 .count = 1,
486 .is_written = info.writes.contains(storage_buffer),
487 });
488 }
489 for (const StorageInst& storage_inst : info.to_replace) {
490 const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
491 const auto it{info.set.find(storage_inst.storage_buffer)};
492 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
493 IR::Block* const block{storage_inst.block};
494 IR::Inst* const inst{storage_inst.inst};
495 const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
496 Replace(*block, *inst, index, offset);
497 }
498}
499
500template <typename Descriptors, typename Descriptor, typename Func>
501static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
502 // TODO: Handle arrays
503 const auto it{std::ranges::find_if(descriptors, pred)};
504 if (it != descriptors.end()) {
505 return static_cast<u32>(std::distance(descriptors.begin(), it));
506 }
507 descriptors.push_back(desc);
508 return static_cast<u32>(descriptors.size()) - 1;
509}
510
511void JoinStorageInfo(Info& base, Info& source) {
512 auto& descriptors = base.storage_buffers_descriptors;
513 for (auto& desc : source.storage_buffers_descriptors) {
514 auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) {
515 return desc.cbuf_index == existing.cbuf_index &&
516 desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count;
517 })};
518 if (it != descriptors.end()) {
519 it->is_written |= desc.is_written;
520 continue;
521 }
522 descriptors.push_back(desc);
523 }
524}
525
526} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
new file mode 100644
index 000000000..e9b55f835
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <vector>
6
7#include "shader_recompiler/frontend/ir/basic_block.h"
8#include "shader_recompiler/frontend/ir/value.h"
9#include "shader_recompiler/ir_opt/passes.h"
10
11namespace Shader::Optimization {
12
13void IdentityRemovalPass(IR::Program& program) {
14 std::vector<IR::Inst*> to_invalidate;
15 for (IR::Block* const block : program.blocks) {
16 for (auto inst = block->begin(); inst != block->end();) {
17 const size_t num_args{inst->NumArgs()};
18 for (size_t i = 0; i < num_args; ++i) {
19 IR::Value arg;
20 while ((arg = inst->Arg(i)).IsIdentity()) {
21 inst->SetArg(i, arg.Inst()->Arg(0));
22 }
23 }
24 if (inst->GetOpcode() == IR::Opcode::Identity ||
25 inst->GetOpcode() == IR::Opcode::Void) {
26 to_invalidate.push_back(&*inst);
27 inst = block->Instructions().erase(inst);
28 } else {
29 ++inst;
30 }
31 }
32 }
33 for (IR::Inst* const inst : to_invalidate) {
34 inst->Invalidate();
35 }
36}
37
38} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
new file mode 100644
index 000000000..773e1f961
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -0,0 +1,143 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "shader_recompiler/frontend/ir/ir_emitter.h"
8#include "shader_recompiler/frontend/ir/value.h"
9#include "shader_recompiler/ir_opt/passes.h"
10
11namespace Shader::Optimization {
12namespace {
13IR::Opcode Replace(IR::Opcode op) {
14 switch (op) {
15 case IR::Opcode::FPAbs16:
16 return IR::Opcode::FPAbs32;
17 case IR::Opcode::FPAdd16:
18 return IR::Opcode::FPAdd32;
19 case IR::Opcode::FPCeil16:
20 return IR::Opcode::FPCeil32;
21 case IR::Opcode::FPFloor16:
22 return IR::Opcode::FPFloor32;
23 case IR::Opcode::FPFma16:
24 return IR::Opcode::FPFma32;
25 case IR::Opcode::FPMul16:
26 return IR::Opcode::FPMul32;
27 case IR::Opcode::FPNeg16:
28 return IR::Opcode::FPNeg32;
29 case IR::Opcode::FPRoundEven16:
30 return IR::Opcode::FPRoundEven32;
31 case IR::Opcode::FPSaturate16:
32 return IR::Opcode::FPSaturate32;
33 case IR::Opcode::FPClamp16:
34 return IR::Opcode::FPClamp32;
35 case IR::Opcode::FPTrunc16:
36 return IR::Opcode::FPTrunc32;
37 case IR::Opcode::CompositeConstructF16x2:
38 return IR::Opcode::CompositeConstructF32x2;
39 case IR::Opcode::CompositeConstructF16x3:
40 return IR::Opcode::CompositeConstructF32x3;
41 case IR::Opcode::CompositeConstructF16x4:
42 return IR::Opcode::CompositeConstructF32x4;
43 case IR::Opcode::CompositeExtractF16x2:
44 return IR::Opcode::CompositeExtractF32x2;
45 case IR::Opcode::CompositeExtractF16x3:
46 return IR::Opcode::CompositeExtractF32x3;
47 case IR::Opcode::CompositeExtractF16x4:
48 return IR::Opcode::CompositeExtractF32x4;
49 case IR::Opcode::CompositeInsertF16x2:
50 return IR::Opcode::CompositeInsertF32x2;
51 case IR::Opcode::CompositeInsertF16x3:
52 return IR::Opcode::CompositeInsertF32x3;
53 case IR::Opcode::CompositeInsertF16x4:
54 return IR::Opcode::CompositeInsertF32x4;
55 case IR::Opcode::FPOrdEqual16:
56 return IR::Opcode::FPOrdEqual32;
57 case IR::Opcode::FPUnordEqual16:
58 return IR::Opcode::FPUnordEqual32;
59 case IR::Opcode::FPOrdNotEqual16:
60 return IR::Opcode::FPOrdNotEqual32;
61 case IR::Opcode::FPUnordNotEqual16:
62 return IR::Opcode::FPUnordNotEqual32;
63 case IR::Opcode::FPOrdLessThan16:
64 return IR::Opcode::FPOrdLessThan32;
65 case IR::Opcode::FPUnordLessThan16:
66 return IR::Opcode::FPUnordLessThan32;
67 case IR::Opcode::FPOrdGreaterThan16:
68 return IR::Opcode::FPOrdGreaterThan32;
69 case IR::Opcode::FPUnordGreaterThan16:
70 return IR::Opcode::FPUnordGreaterThan32;
71 case IR::Opcode::FPOrdLessThanEqual16:
72 return IR::Opcode::FPOrdLessThanEqual32;
73 case IR::Opcode::FPUnordLessThanEqual16:
74 return IR::Opcode::FPUnordLessThanEqual32;
75 case IR::Opcode::FPOrdGreaterThanEqual16:
76 return IR::Opcode::FPOrdGreaterThanEqual32;
77 case IR::Opcode::FPUnordGreaterThanEqual16:
78 return IR::Opcode::FPUnordGreaterThanEqual32;
79 case IR::Opcode::FPIsNan16:
80 return IR::Opcode::FPIsNan32;
81 case IR::Opcode::ConvertS16F16:
82 return IR::Opcode::ConvertS16F32;
83 case IR::Opcode::ConvertS32F16:
84 return IR::Opcode::ConvertS32F32;
85 case IR::Opcode::ConvertS64F16:
86 return IR::Opcode::ConvertS64F32;
87 case IR::Opcode::ConvertU16F16:
88 return IR::Opcode::ConvertU16F32;
89 case IR::Opcode::ConvertU32F16:
90 return IR::Opcode::ConvertU32F32;
91 case IR::Opcode::ConvertU64F16:
92 return IR::Opcode::ConvertU64F32;
93 case IR::Opcode::PackFloat2x16:
94 return IR::Opcode::PackHalf2x16;
95 case IR::Opcode::UnpackFloat2x16:
96 return IR::Opcode::UnpackHalf2x16;
97 case IR::Opcode::ConvertF32F16:
98 return IR::Opcode::Identity;
99 case IR::Opcode::ConvertF16F32:
100 return IR::Opcode::Identity;
101 case IR::Opcode::ConvertF16S8:
102 return IR::Opcode::ConvertF32S8;
103 case IR::Opcode::ConvertF16S16:
104 return IR::Opcode::ConvertF32S16;
105 case IR::Opcode::ConvertF16S32:
106 return IR::Opcode::ConvertF32S32;
107 case IR::Opcode::ConvertF16S64:
108 return IR::Opcode::ConvertF32S64;
109 case IR::Opcode::ConvertF16U8:
110 return IR::Opcode::ConvertF32U8;
111 case IR::Opcode::ConvertF16U16:
112 return IR::Opcode::ConvertF32U16;
113 case IR::Opcode::ConvertF16U32:
114 return IR::Opcode::ConvertF32U32;
115 case IR::Opcode::ConvertF16U64:
116 return IR::Opcode::ConvertF32U64;
117 case IR::Opcode::GlobalAtomicAddF16x2:
118 return IR::Opcode::GlobalAtomicAddF32x2;
119 case IR::Opcode::StorageAtomicAddF16x2:
120 return IR::Opcode::StorageAtomicAddF32x2;
121 case IR::Opcode::GlobalAtomicMinF16x2:
122 return IR::Opcode::GlobalAtomicMinF32x2;
123 case IR::Opcode::StorageAtomicMinF16x2:
124 return IR::Opcode::StorageAtomicMinF32x2;
125 case IR::Opcode::GlobalAtomicMaxF16x2:
126 return IR::Opcode::GlobalAtomicMaxF32x2;
127 case IR::Opcode::StorageAtomicMaxF16x2:
128 return IR::Opcode::StorageAtomicMaxF32x2;
129 default:
130 return op;
131 }
132}
133} // Anonymous namespace
134
135void LowerFp16ToFp32(IR::Program& program) {
136 for (IR::Block* const block : program.blocks) {
137 for (IR::Inst& inst : block->Instructions()) {
138 inst.ReplaceOpcode(Replace(inst.GetOpcode()));
139 }
140 }
141}
142
143} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
new file mode 100644
index 000000000..e80d3d1d9
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
@@ -0,0 +1,218 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/ir_emitter.h"
10#include "shader_recompiler/frontend/ir/program.h"
11#include "shader_recompiler/frontend/ir/value.h"
12#include "shader_recompiler/ir_opt/passes.h"
13
14namespace Shader::Optimization {
15namespace {
16std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
17 if (packed.IsImmediate()) {
18 const u64 value{packed.U64()};
19 return {
20 ir.Imm32(static_cast<u32>(value)),
21 ir.Imm32(static_cast<u32>(value >> 32)),
22 };
23 } else {
24 return std::pair<IR::U32, IR::U32>{
25 ir.CompositeExtract(packed, 0u),
26 ir.CompositeExtract(packed, 1u),
27 };
28 }
29}
30
31void IAdd64To32(IR::Block& block, IR::Inst& inst) {
32 if (inst.HasAssociatedPseudoOperation()) {
33 throw NotImplementedException("IAdd64 emulation with pseudo instructions");
34 }
35 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
36 const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
37 const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
38
39 const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
40 const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
41
42 const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
43 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
44}
45
46void ISub64To32(IR::Block& block, IR::Inst& inst) {
47 if (inst.HasAssociatedPseudoOperation()) {
48 throw NotImplementedException("ISub64 emulation with pseudo instructions");
49 }
50 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
51 const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
52 const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
53
54 const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
55 const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
56 const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
57
58 const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
59 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
60}
61
62void INeg64To32(IR::Block& block, IR::Inst& inst) {
63 if (inst.HasAssociatedPseudoOperation()) {
64 throw NotImplementedException("INeg64 emulation with pseudo instructions");
65 }
66 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
67 auto [lo, hi]{Unpack(ir, inst.Arg(0))};
68 lo = ir.BitwiseNot(lo);
69 hi = ir.BitwiseNot(hi);
70
71 lo = ir.IAdd(lo, ir.Imm32(1));
72
73 const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
74 hi = ir.IAdd(hi, carry);
75
76 inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
77}
78
79void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
80 if (inst.HasAssociatedPseudoOperation()) {
81 throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
82 }
83 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
84 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
85 const IR::U32 shift{inst.Arg(1)};
86
87 const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
88 const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
89
90 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
91 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
92 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
93
94 const IR::U32 long_ret_lo{ir.Imm32(0)};
95 const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
96
97 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
98 const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
99 const IR::U32 short_ret_lo{shifted_lo};
100 const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
101
102 const IR::U32 zero_ret_lo{lo};
103 const IR::U32 zero_ret_hi{hi};
104
105 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
106 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
107
108 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
109 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
110 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
111}
112
113void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
114 if (inst.HasAssociatedPseudoOperation()) {
115 throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
116 }
117 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
118 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
119 const IR::U32 shift{inst.Arg(1)};
120
121 const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
122 const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
123
124 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
125 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
126 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
127
128 const IR::U32 long_ret_hi{ir.Imm32(0)};
129 const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
130
131 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
132 const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
133 const IR::U32 short_ret_hi{shifted_hi};
134 const IR::U32 short_ret_lo{
135 ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
136
137 const IR::U32 zero_ret_lo{lo};
138 const IR::U32 zero_ret_hi{hi};
139
140 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
141 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
142
143 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
144 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
145 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
146}
147
148void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
149 if (inst.HasAssociatedPseudoOperation()) {
150 throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
151 }
152 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
153 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
154 const IR::U32 shift{inst.Arg(1)};
155
156 const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
157 const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
158
159 const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
160
161 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
162 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
163 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
164
165 const IR::U32 long_ret_hi{sign_extension};
166 const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
167
168 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
169 const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
170 const IR::U32 short_ret_hi{shifted_hi};
171 const IR::U32 short_ret_lo{
172 ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
173
174 const IR::U32 zero_ret_lo{lo};
175 const IR::U32 zero_ret_hi{hi};
176
177 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
178 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
179
180 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
181 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
182 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
183}
184
185void Lower(IR::Block& block, IR::Inst& inst) {
186 switch (inst.GetOpcode()) {
187 case IR::Opcode::PackUint2x32:
188 case IR::Opcode::UnpackUint2x32:
189 return inst.ReplaceOpcode(IR::Opcode::Identity);
190 case IR::Opcode::IAdd64:
191 return IAdd64To32(block, inst);
192 case IR::Opcode::ISub64:
193 return ISub64To32(block, inst);
194 case IR::Opcode::INeg64:
195 return INeg64To32(block, inst);
196 case IR::Opcode::ShiftLeftLogical64:
197 return ShiftLeftLogical64To32(block, inst);
198 case IR::Opcode::ShiftRightLogical64:
199 return ShiftRightLogical64To32(block, inst);
200 case IR::Opcode::ShiftRightArithmetic64:
201 return ShiftRightArithmetic64To32(block, inst);
202 default:
203 break;
204 }
205}
206} // Anonymous namespace
207
208void LowerInt64ToInt32(IR::Program& program) {
209 const auto end{program.post_order_blocks.rend()};
210 for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
211 IR::Block* const block{*it};
212 for (IR::Inst& inst : block->Instructions()) {
213 Lower(*block, inst);
214 }
215 }
216}
217
218} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
new file mode 100644
index 000000000..2f89b1ea0
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -0,0 +1,32 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include "shader_recompiler/environment.h"
10#include "shader_recompiler/frontend/ir/basic_block.h"
11#include "shader_recompiler/frontend/ir/program.h"
12
13namespace Shader::Optimization {
14
15void CollectShaderInfoPass(Environment& env, IR::Program& program);
16void ConstantPropagationPass(IR::Program& program);
17void DeadCodeEliminationPass(IR::Program& program);
18void GlobalMemoryToStorageBufferPass(IR::Program& program);
19void IdentityRemovalPass(IR::Program& program);
20void LowerFp16ToFp32(IR::Program& program);
21void LowerInt64ToInt32(IR::Program& program);
22void SsaRewritePass(IR::Program& program);
23void TexturePass(Environment& env, IR::Program& program);
24void VerificationPass(const IR::Program& program);
25
26// Dual Vertex
27void VertexATransformPass(IR::Program& program);
28void VertexBTransformPass(IR::Program& program);
29void JoinTextureInfo(Info& base, Info& source);
30void JoinStorageInfo(Info& base, Info& source);
31
32} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
new file mode 100644
index 000000000..53145fb5e
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp
@@ -0,0 +1,383 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This file implements the SSA rewriting algorithm proposed in
6//
7// Simple and Efficient Construction of Static Single Assignment Form.
8// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
9// In: Jhala R., De Bosschere K. (eds)
10// Compiler Construction. CC 2013.
11// Lecture Notes in Computer Science, vol 7791.
12// Springer, Berlin, Heidelberg
13//
14// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
15//
16
17#include <span>
18#include <variant>
19#include <vector>
20
21#include <boost/container/flat_map.hpp>
22#include <boost/container/flat_set.hpp>
23
24#include "shader_recompiler/frontend/ir/basic_block.h"
25#include "shader_recompiler/frontend/ir/opcodes.h"
26#include "shader_recompiler/frontend/ir/pred.h"
27#include "shader_recompiler/frontend/ir/reg.h"
28#include "shader_recompiler/frontend/ir/value.h"
29#include "shader_recompiler/ir_opt/passes.h"
30
31namespace Shader::Optimization {
32namespace {
33struct FlagTag {
34 auto operator<=>(const FlagTag&) const noexcept = default;
35};
36struct ZeroFlagTag : FlagTag {};
37struct SignFlagTag : FlagTag {};
38struct CarryFlagTag : FlagTag {};
39struct OverflowFlagTag : FlagTag {};
40
41struct GotoVariable : FlagTag {
42 GotoVariable() = default;
43 explicit GotoVariable(u32 index_) : index{index_} {}
44
45 auto operator<=>(const GotoVariable&) const noexcept = default;
46
47 u32 index;
48};
49
50struct IndirectBranchVariable {
51 auto operator<=>(const IndirectBranchVariable&) const noexcept = default;
52};
53
54using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag,
55 OverflowFlagTag, GotoVariable, IndirectBranchVariable>;
56using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>;
57
58struct DefTable {
59 const IR::Value& Def(IR::Block* block, IR::Reg variable) {
60 return block->SsaRegValue(variable);
61 }
62 void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) {
63 block->SetSsaRegValue(variable, value);
64 }
65
66 const IR::Value& Def(IR::Block* block, IR::Pred variable) {
67 return preds[IR::PredIndex(variable)][block];
68 }
69 void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) {
70 preds[IR::PredIndex(variable)].insert_or_assign(block, value);
71 }
72
73 const IR::Value& Def(IR::Block* block, GotoVariable variable) {
74 return goto_vars[variable.index][block];
75 }
76 void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
77 goto_vars[variable.index].insert_or_assign(block, value);
78 }
79
80 const IR::Value& Def(IR::Block* block, IndirectBranchVariable) {
81 return indirect_branch_var[block];
82 }
83 void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) {
84 indirect_branch_var.insert_or_assign(block, value);
85 }
86
87 const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
88 return zero_flag[block];
89 }
90 void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
91 zero_flag.insert_or_assign(block, value);
92 }
93
94 const IR::Value& Def(IR::Block* block, SignFlagTag) {
95 return sign_flag[block];
96 }
97 void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
98 sign_flag.insert_or_assign(block, value);
99 }
100
101 const IR::Value& Def(IR::Block* block, CarryFlagTag) {
102 return carry_flag[block];
103 }
104 void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
105 carry_flag.insert_or_assign(block, value);
106 }
107
108 const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
109 return overflow_flag[block];
110 }
111 void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
112 overflow_flag.insert_or_assign(block, value);
113 }
114
115 std::array<ValueMap, IR::NUM_USER_PREDS> preds;
116 boost::container::flat_map<u32, ValueMap> goto_vars;
117 ValueMap indirect_branch_var;
118 ValueMap zero_flag;
119 ValueMap sign_flag;
120 ValueMap carry_flag;
121 ValueMap overflow_flag;
122};
123
124IR::Opcode UndefOpcode(IR::Reg) noexcept {
125 return IR::Opcode::UndefU32;
126}
127
128IR::Opcode UndefOpcode(IR::Pred) noexcept {
129 return IR::Opcode::UndefU1;
130}
131
132IR::Opcode UndefOpcode(const FlagTag&) noexcept {
133 return IR::Opcode::UndefU1;
134}
135
136IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept {
137 return IR::Opcode::UndefU32;
138}
139
140enum class Status {
141 Start,
142 SetValue,
143 PreparePhiArgument,
144 PushPhiArgument,
145};
146
147template <typename Type>
148struct ReadState {
149 ReadState(IR::Block* block_) : block{block_} {}
150 ReadState() = default;
151
152 IR::Block* block{};
153 IR::Value result{};
154 IR::Inst* phi{};
155 IR::Block* const* pred_it{};
156 IR::Block* const* pred_end{};
157 Status pc{Status::Start};
158};
159
160class Pass {
161public:
162 template <typename Type>
163 void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
164 current_def.SetDef(block, variable, value);
165 }
166
167 template <typename Type>
168 IR::Value ReadVariable(Type variable, IR::Block* root_block) {
169 boost::container::small_vector<ReadState<Type>, 64> stack{
170 ReadState<Type>(nullptr),
171 ReadState<Type>(root_block),
172 };
173 const auto prepare_phi_operand{[&] {
174 if (stack.back().pred_it == stack.back().pred_end) {
175 IR::Inst* const phi{stack.back().phi};
176 IR::Block* const block{stack.back().block};
177 const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
178 stack.pop_back();
179 stack.back().result = result;
180 WriteVariable(variable, block, result);
181 } else {
182 IR::Block* const imm_pred{*stack.back().pred_it};
183 stack.back().pc = Status::PushPhiArgument;
184 stack.emplace_back(imm_pred);
185 }
186 }};
187 do {
188 IR::Block* const block{stack.back().block};
189 switch (stack.back().pc) {
190 case Status::Start: {
191 if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
192 stack.back().result = def;
193 } else if (!block->IsSsaSealed()) {
194 // Incomplete CFG
195 IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
196 phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
197
198 incomplete_phis[block].insert_or_assign(variable, phi);
199 stack.back().result = IR::Value{&*phi};
200 } else if (const std::span imm_preds = block->ImmPredecessors();
201 imm_preds.size() == 1) {
202 // Optimize the common case of one predecessor: no phi needed
203 stack.back().pc = Status::SetValue;
204 stack.emplace_back(imm_preds.front());
205 break;
206 } else {
207 // Break potential cycles with operandless phi
208 IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
209 phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
210
211 WriteVariable(variable, block, IR::Value{phi});
212
213 stack.back().phi = phi;
214 stack.back().pred_it = imm_preds.data();
215 stack.back().pred_end = imm_preds.data() + imm_preds.size();
216 prepare_phi_operand();
217 break;
218 }
219 }
220 [[fallthrough]];
221 case Status::SetValue: {
222 const IR::Value result{stack.back().result};
223 WriteVariable(variable, block, result);
224 stack.pop_back();
225 stack.back().result = result;
226 break;
227 }
228 case Status::PushPhiArgument: {
229 IR::Inst* const phi{stack.back().phi};
230 phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
231 ++stack.back().pred_it;
232 }
233 [[fallthrough]];
234 case Status::PreparePhiArgument:
235 prepare_phi_operand();
236 break;
237 }
238 } while (stack.size() > 1);
239 return stack.back().result;
240 }
241
242 void SealBlock(IR::Block* block) {
243 const auto it{incomplete_phis.find(block)};
244 if (it != incomplete_phis.end()) {
245 for (auto& pair : it->second) {
246 auto& variant{pair.first};
247 auto& phi{pair.second};
248 std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
249 }
250 }
251 block->SsaSeal();
252 }
253
254private:
255 template <typename Type>
256 IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
257 for (IR::Block* const imm_pred : block->ImmPredecessors()) {
258 phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
259 }
260 return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
261 }
262
263 IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
264 IR::Value same;
265 const size_t num_args{phi.NumArgs()};
266 for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
267 const IR::Value& op{phi.Arg(arg_index)};
268 if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
269 // Unique value or self-reference
270 continue;
271 }
272 if (!same.IsEmpty()) {
273 // The phi merges at least two values: not trivial
274 return IR::Value{&phi};
275 }
276 same = op;
277 }
278 // Remove the phi node from the block, it will be reinserted
279 IR::Block::InstructionList& list{block->Instructions()};
280 list.erase(IR::Block::InstructionList::s_iterator_to(phi));
281
282 // Find the first non-phi instruction and use it as an insertion point
283 IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
284 if (same.IsEmpty()) {
285 // The phi is unreachable or in the start block
286 // Insert an undefined instruction and make it the phi node replacement
287 // The "phi" node reinsertion point is specified after this instruction
288 reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
289 same = IR::Value{&*reinsert_point};
290 ++reinsert_point;
291 }
292 // Reinsert the phi node and reroute all its uses to the "same" value
293 list.insert(reinsert_point, phi);
294 phi.ReplaceUsesWith(same);
295 // TODO: Try to recursively remove all phi users, which might have become trivial
296 return same;
297 }
298
299 boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>>
300 incomplete_phis;
301 DefTable current_def;
302};
303
304void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
305 switch (inst.GetOpcode()) {
306 case IR::Opcode::SetRegister:
307 if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
308 pass.WriteVariable(reg, block, inst.Arg(1));
309 }
310 break;
311 case IR::Opcode::SetPred:
312 if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
313 pass.WriteVariable(pred, block, inst.Arg(1));
314 }
315 break;
316 case IR::Opcode::SetGotoVariable:
317 pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
318 break;
319 case IR::Opcode::SetIndirectBranchVariable:
320 pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0));
321 break;
322 case IR::Opcode::SetZFlag:
323 pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0));
324 break;
325 case IR::Opcode::SetSFlag:
326 pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
327 break;
328 case IR::Opcode::SetCFlag:
329 pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
330 break;
331 case IR::Opcode::SetOFlag:
332 pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
333 break;
334 case IR::Opcode::GetRegister:
335 if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) {
336 inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
337 }
338 break;
339 case IR::Opcode::GetPred:
340 if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) {
341 inst.ReplaceUsesWith(pass.ReadVariable(pred, block));
342 }
343 break;
344 case IR::Opcode::GetGotoVariable:
345 inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
346 break;
347 case IR::Opcode::GetIndirectBranchVariable:
348 inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block));
349 break;
350 case IR::Opcode::GetZFlag:
351 inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block));
352 break;
353 case IR::Opcode::GetSFlag:
354 inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
355 break;
356 case IR::Opcode::GetCFlag:
357 inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
358 break;
359 case IR::Opcode::GetOFlag:
360 inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
361 break;
362 default:
363 break;
364 }
365}
366
367void VisitBlock(Pass& pass, IR::Block* block) {
368 for (IR::Inst& inst : block->Instructions()) {
369 VisitInst(pass, block, inst);
370 }
371 pass.SealBlock(block);
372}
373} // Anonymous namespace
374
375void SsaRewritePass(IR::Program& program) {
376 Pass pass;
377 const auto end{program.post_order_blocks.rend()};
378 for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) {
379 VisitBlock(pass, *block);
380 }
381}
382
383} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
new file mode 100644
index 000000000..44ad10d43
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -0,0 +1,523 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <bit>
7#include <optional>
8
9#include <boost/container/small_vector.hpp>
10
11#include "shader_recompiler/environment.h"
12#include "shader_recompiler/frontend/ir/basic_block.h"
13#include "shader_recompiler/frontend/ir/breadth_first_search.h"
14#include "shader_recompiler/frontend/ir/ir_emitter.h"
15#include "shader_recompiler/ir_opt/passes.h"
16#include "shader_recompiler/shader_info.h"
17
18namespace Shader::Optimization {
19namespace {
20struct ConstBufferAddr {
21 u32 index;
22 u32 offset;
23 u32 secondary_index;
24 u32 secondary_offset;
25 IR::U32 dynamic_offset;
26 u32 count;
27 bool has_secondary;
28};
29
30struct TextureInst {
31 ConstBufferAddr cbuf;
32 IR::Inst* inst;
33 IR::Block* block;
34};
35
36using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
37
38constexpr u32 DESCRIPTOR_SIZE = 8;
39constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
40
41IR::Opcode IndexedInstruction(const IR::Inst& inst) {
42 switch (inst.GetOpcode()) {
43 case IR::Opcode::BindlessImageSampleImplicitLod:
44 case IR::Opcode::BoundImageSampleImplicitLod:
45 return IR::Opcode::ImageSampleImplicitLod;
46 case IR::Opcode::BoundImageSampleExplicitLod:
47 case IR::Opcode::BindlessImageSampleExplicitLod:
48 return IR::Opcode::ImageSampleExplicitLod;
49 case IR::Opcode::BoundImageSampleDrefImplicitLod:
50 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
51 return IR::Opcode::ImageSampleDrefImplicitLod;
52 case IR::Opcode::BoundImageSampleDrefExplicitLod:
53 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
54 return IR::Opcode::ImageSampleDrefExplicitLod;
55 case IR::Opcode::BindlessImageGather:
56 case IR::Opcode::BoundImageGather:
57 return IR::Opcode::ImageGather;
58 case IR::Opcode::BindlessImageGatherDref:
59 case IR::Opcode::BoundImageGatherDref:
60 return IR::Opcode::ImageGatherDref;
61 case IR::Opcode::BindlessImageFetch:
62 case IR::Opcode::BoundImageFetch:
63 return IR::Opcode::ImageFetch;
64 case IR::Opcode::BoundImageQueryDimensions:
65 case IR::Opcode::BindlessImageQueryDimensions:
66 return IR::Opcode::ImageQueryDimensions;
67 case IR::Opcode::BoundImageQueryLod:
68 case IR::Opcode::BindlessImageQueryLod:
69 return IR::Opcode::ImageQueryLod;
70 case IR::Opcode::BoundImageGradient:
71 case IR::Opcode::BindlessImageGradient:
72 return IR::Opcode::ImageGradient;
73 case IR::Opcode::BoundImageRead:
74 case IR::Opcode::BindlessImageRead:
75 return IR::Opcode::ImageRead;
76 case IR::Opcode::BoundImageWrite:
77 case IR::Opcode::BindlessImageWrite:
78 return IR::Opcode::ImageWrite;
79 case IR::Opcode::BoundImageAtomicIAdd32:
80 case IR::Opcode::BindlessImageAtomicIAdd32:
81 return IR::Opcode::ImageAtomicIAdd32;
82 case IR::Opcode::BoundImageAtomicSMin32:
83 case IR::Opcode::BindlessImageAtomicSMin32:
84 return IR::Opcode::ImageAtomicSMin32;
85 case IR::Opcode::BoundImageAtomicUMin32:
86 case IR::Opcode::BindlessImageAtomicUMin32:
87 return IR::Opcode::ImageAtomicUMin32;
88 case IR::Opcode::BoundImageAtomicSMax32:
89 case IR::Opcode::BindlessImageAtomicSMax32:
90 return IR::Opcode::ImageAtomicSMax32;
91 case IR::Opcode::BoundImageAtomicUMax32:
92 case IR::Opcode::BindlessImageAtomicUMax32:
93 return IR::Opcode::ImageAtomicUMax32;
94 case IR::Opcode::BoundImageAtomicInc32:
95 case IR::Opcode::BindlessImageAtomicInc32:
96 return IR::Opcode::ImageAtomicInc32;
97 case IR::Opcode::BoundImageAtomicDec32:
98 case IR::Opcode::BindlessImageAtomicDec32:
99 return IR::Opcode::ImageAtomicDec32;
100 case IR::Opcode::BoundImageAtomicAnd32:
101 case IR::Opcode::BindlessImageAtomicAnd32:
102 return IR::Opcode::ImageAtomicAnd32;
103 case IR::Opcode::BoundImageAtomicOr32:
104 case IR::Opcode::BindlessImageAtomicOr32:
105 return IR::Opcode::ImageAtomicOr32;
106 case IR::Opcode::BoundImageAtomicXor32:
107 case IR::Opcode::BindlessImageAtomicXor32:
108 return IR::Opcode::ImageAtomicXor32;
109 case IR::Opcode::BoundImageAtomicExchange32:
110 case IR::Opcode::BindlessImageAtomicExchange32:
111 return IR::Opcode::ImageAtomicExchange32;
112 default:
113 return IR::Opcode::Void;
114 }
115}
116
117bool IsBindless(const IR::Inst& inst) {
118 switch (inst.GetOpcode()) {
119 case IR::Opcode::BindlessImageSampleImplicitLod:
120 case IR::Opcode::BindlessImageSampleExplicitLod:
121 case IR::Opcode::BindlessImageSampleDrefImplicitLod:
122 case IR::Opcode::BindlessImageSampleDrefExplicitLod:
123 case IR::Opcode::BindlessImageGather:
124 case IR::Opcode::BindlessImageGatherDref:
125 case IR::Opcode::BindlessImageFetch:
126 case IR::Opcode::BindlessImageQueryDimensions:
127 case IR::Opcode::BindlessImageQueryLod:
128 case IR::Opcode::BindlessImageGradient:
129 case IR::Opcode::BindlessImageRead:
130 case IR::Opcode::BindlessImageWrite:
131 case IR::Opcode::BindlessImageAtomicIAdd32:
132 case IR::Opcode::BindlessImageAtomicSMin32:
133 case IR::Opcode::BindlessImageAtomicUMin32:
134 case IR::Opcode::BindlessImageAtomicSMax32:
135 case IR::Opcode::BindlessImageAtomicUMax32:
136 case IR::Opcode::BindlessImageAtomicInc32:
137 case IR::Opcode::BindlessImageAtomicDec32:
138 case IR::Opcode::BindlessImageAtomicAnd32:
139 case IR::Opcode::BindlessImageAtomicOr32:
140 case IR::Opcode::BindlessImageAtomicXor32:
141 case IR::Opcode::BindlessImageAtomicExchange32:
142 return true;
143 case IR::Opcode::BoundImageSampleImplicitLod:
144 case IR::Opcode::BoundImageSampleExplicitLod:
145 case IR::Opcode::BoundImageSampleDrefImplicitLod:
146 case IR::Opcode::BoundImageSampleDrefExplicitLod:
147 case IR::Opcode::BoundImageGather:
148 case IR::Opcode::BoundImageGatherDref:
149 case IR::Opcode::BoundImageFetch:
150 case IR::Opcode::BoundImageQueryDimensions:
151 case IR::Opcode::BoundImageQueryLod:
152 case IR::Opcode::BoundImageGradient:
153 case IR::Opcode::BoundImageRead:
154 case IR::Opcode::BoundImageWrite:
155 case IR::Opcode::BoundImageAtomicIAdd32:
156 case IR::Opcode::BoundImageAtomicSMin32:
157 case IR::Opcode::BoundImageAtomicUMin32:
158 case IR::Opcode::BoundImageAtomicSMax32:
159 case IR::Opcode::BoundImageAtomicUMax32:
160 case IR::Opcode::BoundImageAtomicInc32:
161 case IR::Opcode::BoundImageAtomicDec32:
162 case IR::Opcode::BoundImageAtomicAnd32:
163 case IR::Opcode::BoundImageAtomicOr32:
164 case IR::Opcode::BoundImageAtomicXor32:
165 case IR::Opcode::BoundImageAtomicExchange32:
166 return false;
167 default:
168 throw InvalidArgument("Invalid opcode {}", inst.GetOpcode());
169 }
170}
171
172bool IsTextureInstruction(const IR::Inst& inst) {
173 return IndexedInstruction(inst) != IR::Opcode::Void;
174}
175
176std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst);
177
178std::optional<ConstBufferAddr> Track(const IR::Value& value) {
179 return IR::BreadthFirstSearch(value, TryGetConstBuffer);
180}
181
182std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) {
183 switch (inst->GetOpcode()) {
184 default:
185 return std::nullopt;
186 case IR::Opcode::BitwiseOr32: {
187 std::optional lhs{Track(inst->Arg(0))};
188 std::optional rhs{Track(inst->Arg(1))};
189 if (!lhs || !rhs) {
190 return std::nullopt;
191 }
192 if (lhs->has_secondary || rhs->has_secondary) {
193 return std::nullopt;
194 }
195 if (lhs->count > 1 || rhs->count > 1) {
196 return std::nullopt;
197 }
198 if (lhs->index > rhs->index || lhs->offset > rhs->offset) {
199 std::swap(lhs, rhs);
200 }
201 return ConstBufferAddr{
202 .index = lhs->index,
203 .offset = lhs->offset,
204 .secondary_index = rhs->index,
205 .secondary_offset = rhs->offset,
206 .dynamic_offset = {},
207 .count = 1,
208 .has_secondary = true,
209 };
210 }
211 case IR::Opcode::GetCbufU32x2:
212 case IR::Opcode::GetCbufU32:
213 break;
214 }
215 const IR::Value index{inst->Arg(0)};
216 const IR::Value offset{inst->Arg(1)};
217 if (!index.IsImmediate()) {
218 // Reading a bindless texture from variable indices is valid
219 // but not supported here at the moment
220 return std::nullopt;
221 }
222 if (offset.IsImmediate()) {
223 return ConstBufferAddr{
224 .index = index.U32(),
225 .offset = offset.U32(),
226 .secondary_index = 0,
227 .secondary_offset = 0,
228 .dynamic_offset = {},
229 .count = 1,
230 .has_secondary = false,
231 };
232 }
233 IR::Inst* const offset_inst{offset.InstRecursive()};
234 if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) {
235 return std::nullopt;
236 }
237 u32 base_offset{};
238 IR::U32 dynamic_offset;
239 if (offset_inst->Arg(0).IsImmediate()) {
240 base_offset = offset_inst->Arg(0).U32();
241 dynamic_offset = IR::U32{offset_inst->Arg(1)};
242 } else if (offset_inst->Arg(1).IsImmediate()) {
243 base_offset = offset_inst->Arg(1).U32();
244 dynamic_offset = IR::U32{offset_inst->Arg(0)};
245 } else {
246 return std::nullopt;
247 }
248 return ConstBufferAddr{
249 .index = index.U32(),
250 .offset = base_offset,
251 .secondary_index = 0,
252 .secondary_offset = 0,
253 .dynamic_offset = dynamic_offset,
254 .count = 8,
255 .has_secondary = false,
256 };
257}
258
259TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
260 ConstBufferAddr addr;
261 if (IsBindless(inst)) {
262 const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))};
263 if (!track_addr) {
264 throw NotImplementedException("Failed to track bindless texture constant buffer");
265 }
266 addr = *track_addr;
267 } else {
268 addr = ConstBufferAddr{
269 .index = env.TextureBoundBuffer(),
270 .offset = inst.Arg(0).U32(),
271 .secondary_index = 0,
272 .secondary_offset = 0,
273 .dynamic_offset = {},
274 .count = 1,
275 .has_secondary = false,
276 };
277 }
278 return TextureInst{
279 .cbuf = addr,
280 .inst = &inst,
281 .block = block,
282 };
283}
284
285TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
286 const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
287 const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
288 const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
289 const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
290 return env.ReadTextureType(lhs_raw | rhs_raw);
291}
292
293class Descriptors {
294public:
295 explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
296 ImageBufferDescriptors& image_buffer_descriptors_,
297 TextureDescriptors& texture_descriptors_,
298 ImageDescriptors& image_descriptors_)
299 : texture_buffer_descriptors{texture_buffer_descriptors_},
300 image_buffer_descriptors{image_buffer_descriptors_},
301 texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {}
302
303 u32 Add(const TextureBufferDescriptor& desc) {
304 return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
305 return desc.cbuf_index == existing.cbuf_index &&
306 desc.cbuf_offset == existing.cbuf_offset &&
307 desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
308 desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
309 desc.count == existing.count && desc.size_shift == existing.size_shift &&
310 desc.has_secondary == existing.has_secondary;
311 });
312 }
313
314 u32 Add(const ImageBufferDescriptor& desc) {
315 const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
316 return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
317 desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
318 desc.size_shift == existing.size_shift;
319 })};
320 image_buffer_descriptors[index].is_written |= desc.is_written;
321 image_buffer_descriptors[index].is_read |= desc.is_read;
322 return index;
323 }
324
325 u32 Add(const TextureDescriptor& desc) {
326 return Add(texture_descriptors, desc, [&desc](const auto& existing) {
327 return desc.type == existing.type && desc.is_depth == existing.is_depth &&
328 desc.has_secondary == existing.has_secondary &&
329 desc.cbuf_index == existing.cbuf_index &&
330 desc.cbuf_offset == existing.cbuf_offset &&
331 desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
332 desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
333 desc.count == existing.count && desc.size_shift == existing.size_shift;
334 });
335 }
336
337 u32 Add(const ImageDescriptor& desc) {
338 const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) {
339 return desc.type == existing.type && desc.format == existing.format &&
340 desc.cbuf_index == existing.cbuf_index &&
341 desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
342 desc.size_shift == existing.size_shift;
343 })};
344 image_descriptors[index].is_written |= desc.is_written;
345 image_descriptors[index].is_read |= desc.is_read;
346 return index;
347 }
348
349private:
350 template <typename Descriptors, typename Descriptor, typename Func>
351 static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
352 // TODO: Handle arrays
353 const auto it{std::ranges::find_if(descriptors, pred)};
354 if (it != descriptors.end()) {
355 return static_cast<u32>(std::distance(descriptors.begin(), it));
356 }
357 descriptors.push_back(desc);
358 return static_cast<u32>(descriptors.size()) - 1;
359 }
360
361 TextureBufferDescriptors& texture_buffer_descriptors;
362 ImageBufferDescriptors& image_buffer_descriptors;
363 TextureDescriptors& texture_descriptors;
364 ImageDescriptors& image_descriptors;
365};
366} // Anonymous namespace
367
368void TexturePass(Environment& env, IR::Program& program) {
369 TextureInstVector to_replace;
370 for (IR::Block* const block : program.post_order_blocks) {
371 for (IR::Inst& inst : block->Instructions()) {
372 if (!IsTextureInstruction(inst)) {
373 continue;
374 }
375 to_replace.push_back(MakeInst(env, block, inst));
376 }
377 }
378 // Sort instructions to visit textures by constant buffer index, then by offset
379 std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) {
380 return lhs.cbuf.offset < rhs.cbuf.offset;
381 });
382 std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) {
383 return lhs.cbuf.index < rhs.cbuf.index;
384 });
385 Descriptors descriptors{
386 program.info.texture_buffer_descriptors,
387 program.info.image_buffer_descriptors,
388 program.info.texture_descriptors,
389 program.info.image_descriptors,
390 };
391 for (TextureInst& texture_inst : to_replace) {
392 // TODO: Handle arrays
393 IR::Inst* const inst{texture_inst.inst};
394 inst->ReplaceOpcode(IndexedInstruction(*inst));
395
396 const auto& cbuf{texture_inst.cbuf};
397 auto flags{inst->Flags<IR::TextureInstInfo>()};
398 switch (inst->GetOpcode()) {
399 case IR::Opcode::ImageQueryDimensions:
400 flags.type.Assign(ReadTextureType(env, cbuf));
401 inst->SetFlags(flags);
402 break;
403 case IR::Opcode::ImageFetch:
404 if (flags.type != TextureType::Color1D) {
405 break;
406 }
407 if (ReadTextureType(env, cbuf) == TextureType::Buffer) {
408 // Replace with the bound texture type only when it's a texture buffer
409 // If the instruction is 1D and the bound type is 2D, don't change the code and let
410 // the rasterizer robustness handle it
411 // This happens on Fire Emblem: Three Houses
412 flags.type.Assign(TextureType::Buffer);
413 }
414 break;
415 default:
416 break;
417 }
418 u32 index;
419 switch (inst->GetOpcode()) {
420 case IR::Opcode::ImageRead:
421 case IR::Opcode::ImageAtomicIAdd32:
422 case IR::Opcode::ImageAtomicSMin32:
423 case IR::Opcode::ImageAtomicUMin32:
424 case IR::Opcode::ImageAtomicSMax32:
425 case IR::Opcode::ImageAtomicUMax32:
426 case IR::Opcode::ImageAtomicInc32:
427 case IR::Opcode::ImageAtomicDec32:
428 case IR::Opcode::ImageAtomicAnd32:
429 case IR::Opcode::ImageAtomicOr32:
430 case IR::Opcode::ImageAtomicXor32:
431 case IR::Opcode::ImageAtomicExchange32:
432 case IR::Opcode::ImageWrite: {
433 if (cbuf.has_secondary) {
434 throw NotImplementedException("Unexpected separate sampler");
435 }
436 const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
437 const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite};
438 if (flags.type == TextureType::Buffer) {
439 index = descriptors.Add(ImageBufferDescriptor{
440 .format = flags.image_format,
441 .is_written = is_written,
442 .is_read = is_read,
443 .cbuf_index = cbuf.index,
444 .cbuf_offset = cbuf.offset,
445 .count = cbuf.count,
446 .size_shift = DESCRIPTOR_SIZE_SHIFT,
447 });
448 } else {
449 index = descriptors.Add(ImageDescriptor{
450 .type = flags.type,
451 .format = flags.image_format,
452 .is_written = is_written,
453 .is_read = is_read,
454 .cbuf_index = cbuf.index,
455 .cbuf_offset = cbuf.offset,
456 .count = cbuf.count,
457 .size_shift = DESCRIPTOR_SIZE_SHIFT,
458 });
459 }
460 break;
461 }
462 default:
463 if (flags.type == TextureType::Buffer) {
464 index = descriptors.Add(TextureBufferDescriptor{
465 .has_secondary = cbuf.has_secondary,
466 .cbuf_index = cbuf.index,
467 .cbuf_offset = cbuf.offset,
468 .secondary_cbuf_index = cbuf.secondary_index,
469 .secondary_cbuf_offset = cbuf.secondary_offset,
470 .count = cbuf.count,
471 .size_shift = DESCRIPTOR_SIZE_SHIFT,
472 });
473 } else {
474 index = descriptors.Add(TextureDescriptor{
475 .type = flags.type,
476 .is_depth = flags.is_depth != 0,
477 .has_secondary = cbuf.has_secondary,
478 .cbuf_index = cbuf.index,
479 .cbuf_offset = cbuf.offset,
480 .secondary_cbuf_index = cbuf.secondary_index,
481 .secondary_cbuf_offset = cbuf.secondary_offset,
482 .count = cbuf.count,
483 .size_shift = DESCRIPTOR_SIZE_SHIFT,
484 });
485 }
486 break;
487 }
488 flags.descriptor_index.Assign(index);
489 inst->SetFlags(flags);
490
491 if (cbuf.count > 1) {
492 const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)};
493 IR::IREmitter ir{*texture_inst.block, insert_point};
494 const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))};
495 inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift));
496 } else {
497 inst->SetArg(0, IR::Value{});
498 }
499 }
500}
501
502void JoinTextureInfo(Info& base, Info& source) {
503 Descriptors descriptors{
504 base.texture_buffer_descriptors,
505 base.image_buffer_descriptors,
506 base.texture_descriptors,
507 base.image_descriptors,
508 };
509 for (auto& desc : source.texture_buffer_descriptors) {
510 descriptors.Add(desc);
511 }
512 for (auto& desc : source.image_buffer_descriptors) {
513 descriptors.Add(desc);
514 }
515 for (auto& desc : source.texture_descriptors) {
516 descriptors.Add(desc);
517 }
518 for (auto& desc : source.image_descriptors) {
519 descriptors.Add(desc);
520 }
521}
522
523} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp
new file mode 100644
index 000000000..975d5aadf
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/verification_pass.cpp
@@ -0,0 +1,98 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <set>
7
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/basic_block.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/ir_opt/passes.h"
12
13namespace Shader::Optimization {
14
15static void ValidateTypes(const IR::Program& program) {
16 for (const auto& block : program.blocks) {
17 for (const IR::Inst& inst : *block) {
18 if (inst.GetOpcode() == IR::Opcode::Phi) {
19 // Skip validation on phi nodes
20 continue;
21 }
22 const size_t num_args{inst.NumArgs()};
23 for (size_t i = 0; i < num_args; ++i) {
24 const IR::Type t1{inst.Arg(i).Type()};
25 const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)};
26 if (!IR::AreTypesCompatible(t1, t2)) {
27 throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block));
28 }
29 }
30 }
31 }
32}
33
34static void ValidateUses(const IR::Program& program) {
35 std::map<IR::Inst*, int> actual_uses;
36 for (const auto& block : program.blocks) {
37 for (const IR::Inst& inst : *block) {
38 const size_t num_args{inst.NumArgs()};
39 for (size_t i = 0; i < num_args; ++i) {
40 const IR::Value arg{inst.Arg(i)};
41 if (!arg.IsImmediate()) {
42 ++actual_uses[arg.Inst()];
43 }
44 }
45 }
46 }
47 for (const auto [inst, uses] : actual_uses) {
48 if (inst->UseCount() != uses) {
49 throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program));
50 }
51 }
52}
53
54static void ValidateForwardDeclarations(const IR::Program& program) {
55 std::set<const IR::Inst*> definitions;
56 for (const IR::Block* const block : program.blocks) {
57 for (const IR::Inst& inst : *block) {
58 definitions.emplace(&inst);
59 if (inst.GetOpcode() == IR::Opcode::Phi) {
60 // Phi nodes can have forward declarations
61 continue;
62 }
63 const size_t num_args{inst.NumArgs()};
64 for (size_t arg = 0; arg < num_args; ++arg) {
65 if (inst.Arg(arg).IsImmediate()) {
66 continue;
67 }
68 if (!definitions.contains(inst.Arg(arg).Inst())) {
69 throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block));
70 }
71 }
72 }
73 }
74}
75
76static void ValidatePhiNodes(const IR::Program& program) {
77 for (const IR::Block* const block : program.blocks) {
78 bool no_more_phis{false};
79 for (const IR::Inst& inst : *block) {
80 if (inst.GetOpcode() == IR::Opcode::Phi) {
81 if (no_more_phis) {
82 throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block));
83 }
84 } else {
85 no_more_phis = true;
86 }
87 }
88 }
89}
90
91void VerificationPass(const IR::Program& program) {
92 ValidateTypes(program);
93 ValidateUses(program);
94 ValidateForwardDeclarations(program);
95 ValidatePhiNodes(program);
96}
97
98} // namespace Shader::Optimization