summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_instructions.h31
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp107
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp142
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_instructions.h31
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp119
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp59
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_instructions.h30
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp59
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.h8
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp19
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc19
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp14
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp580
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp4
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp104
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp52
-rw-r--r--src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp20
-rw-r--r--src/shader_recompiler/ir_opt/rescaling_pass.cpp29
-rw-r--r--src/shader_recompiler/shader_info.h2
21 files changed, 1397 insertions, 126 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index b48007856..5efbe4e6f 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -372,6 +372,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 poin
372 ScalarU32 value); 372 ScalarU32 value);
373void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, 373void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
374 Register value); 374 Register value);
375void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
376 Register value);
375void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 377void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
376 ScalarU32 offset, ScalarU32 value); 378 ScalarU32 offset, ScalarU32 value);
377void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 379void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -412,6 +414,24 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
412 ScalarU32 offset, Register value); 414 ScalarU32 offset, Register value);
413void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 415void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
414 ScalarU32 offset, Register value); 416 ScalarU32 offset, Register value);
417void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
418 ScalarU32 offset, Register value);
419void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
420 ScalarU32 offset, Register value);
421void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
422 ScalarU32 offset, Register value);
423void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
424 ScalarU32 offset, Register value);
425void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
426 ScalarU32 offset, Register value);
427void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
428 ScalarU32 offset, Register value);
429void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
430 ScalarU32 offset, Register value);
431void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
432 ScalarU32 offset, Register value);
433void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
434 ScalarU32 offset, Register value);
415void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 435void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
416 ScalarU32 offset, ScalarF32 value); 436 ScalarU32 offset, ScalarF32 value);
417void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 437void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -448,6 +468,17 @@ void EmitGlobalAtomicAnd64(EmitContext& ctx);
448void EmitGlobalAtomicOr64(EmitContext& ctx); 468void EmitGlobalAtomicOr64(EmitContext& ctx);
449void EmitGlobalAtomicXor64(EmitContext& ctx); 469void EmitGlobalAtomicXor64(EmitContext& ctx);
450void EmitGlobalAtomicExchange64(EmitContext& ctx); 470void EmitGlobalAtomicExchange64(EmitContext& ctx);
471void EmitGlobalAtomicIAdd32x2(EmitContext& ctx);
472void EmitGlobalAtomicSMin32x2(EmitContext& ctx);
473void EmitGlobalAtomicUMin32x2(EmitContext& ctx);
474void EmitGlobalAtomicSMax32x2(EmitContext& ctx);
475void EmitGlobalAtomicUMax32x2(EmitContext& ctx);
476void EmitGlobalAtomicInc32x2(EmitContext& ctx);
477void EmitGlobalAtomicDec32x2(EmitContext& ctx);
478void EmitGlobalAtomicAnd32x2(EmitContext& ctx);
479void EmitGlobalAtomicOr32x2(EmitContext& ctx);
480void EmitGlobalAtomicXor32x2(EmitContext& ctx);
481void EmitGlobalAtomicExchange32x2(EmitContext& ctx);
451void EmitGlobalAtomicAddF32(EmitContext& ctx); 482void EmitGlobalAtomicAddF32(EmitContext& ctx);
452void EmitGlobalAtomicAddF16x2(EmitContext& ctx); 483void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
453void EmitGlobalAtomicAddF32x2(EmitContext& ctx); 484void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
index f135b67f5..f0fd94a28 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
@@ -311,6 +311,13 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 poin
311 ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); 311 ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
312} 312}
313 313
314void EmitSharedAtomicExchange32x2([[maybe_unused]] EmitContext& ctx,
315 [[maybe_unused]] IR::Inst& inst,
316 [[maybe_unused]] ScalarU32 pointer_offset,
317 [[maybe_unused]] Register value) {
318 throw NotImplementedException("GLASM instruction");
319}
320
314void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 321void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
315 ScalarU32 offset, ScalarU32 value) { 322 ScalarU32 offset, ScalarU32 value) {
316 Atom(ctx, inst, binding, offset, value, "ADD", "U32"); 323 Atom(ctx, inst, binding, offset, value, "ADD", "U32");
@@ -411,6 +418,62 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val
411 Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); 418 Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
412} 419}
413 420
421void EmitStorageAtomicIAdd32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
422 [[maybe_unused]] const IR::Value& binding,
423 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
424 throw NotImplementedException("GLASM instruction");
425}
426
427void EmitStorageAtomicSMin32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
428 [[maybe_unused]] const IR::Value& binding,
429 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
430 throw NotImplementedException("GLASM instruction");
431}
432
433void EmitStorageAtomicUMin32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
434 [[maybe_unused]] const IR::Value& binding,
435 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
436 throw NotImplementedException("GLASM instruction");
437}
438
439void EmitStorageAtomicSMax32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
440 [[maybe_unused]] const IR::Value& binding,
441 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
442 throw NotImplementedException("GLASM instruction");
443}
444
445void EmitStorageAtomicUMax32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
446 [[maybe_unused]] const IR::Value& binding,
447 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
448 throw NotImplementedException("GLASM instruction");
449}
450
451void EmitStorageAtomicAnd32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
452 [[maybe_unused]] const IR::Value& binding,
453 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
454 throw NotImplementedException("GLASM instruction");
455}
456
457void EmitStorageAtomicOr32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
458 [[maybe_unused]] const IR::Value& binding,
459 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
460 throw NotImplementedException("GLASM instruction");
461}
462
463void EmitStorageAtomicXor32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
464 [[maybe_unused]] const IR::Value& binding,
465 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
466 throw NotImplementedException("GLASM instruction");
467}
468
469void EmitStorageAtomicExchange32x2([[maybe_unused]] EmitContext& ctx,
470 [[maybe_unused]] IR::Inst& inst,
471 [[maybe_unused]] const IR::Value& binding,
472 [[maybe_unused]] ScalarU32 offset,
473 [[maybe_unused]] Register value) {
474 throw NotImplementedException("GLASM instruction");
475}
476
414void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 477void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
415 ScalarU32 offset, ScalarF32 value) { 478 ScalarU32 offset, ScalarF32 value) {
416 Atom(ctx, inst, binding, offset, value, "ADD", "F32"); 479 Atom(ctx, inst, binding, offset, value, "ADD", "F32");
@@ -537,6 +600,50 @@ void EmitGlobalAtomicExchange64(EmitContext&) {
537 throw NotImplementedException("GLASM instruction"); 600 throw NotImplementedException("GLASM instruction");
538} 601}
539 602
603void EmitGlobalAtomicIAdd32x2(EmitContext&) {
604 throw NotImplementedException("GLASM instruction");
605}
606
607void EmitGlobalAtomicSMin32x2(EmitContext&) {
608 throw NotImplementedException("GLASM instruction");
609}
610
611void EmitGlobalAtomicUMin32x2(EmitContext&) {
612 throw NotImplementedException("GLASM instruction");
613}
614
615void EmitGlobalAtomicSMax32x2(EmitContext&) {
616 throw NotImplementedException("GLASM instruction");
617}
618
619void EmitGlobalAtomicUMax32x2(EmitContext&) {
620 throw NotImplementedException("GLASM instruction");
621}
622
623void EmitGlobalAtomicInc32x2(EmitContext&) {
624 throw NotImplementedException("GLASM instruction");
625}
626
627void EmitGlobalAtomicDec32x2(EmitContext&) {
628 throw NotImplementedException("GLASM instruction");
629}
630
631void EmitGlobalAtomicAnd32x2(EmitContext&) {
632 throw NotImplementedException("GLASM instruction");
633}
634
635void EmitGlobalAtomicOr32x2(EmitContext&) {
636 throw NotImplementedException("GLASM instruction");
637}
638
639void EmitGlobalAtomicXor32x2(EmitContext&) {
640 throw NotImplementedException("GLASM instruction");
641}
642
643void EmitGlobalAtomicExchange32x2(EmitContext&) {
644 throw NotImplementedException("GLASM instruction");
645}
646
540void EmitGlobalAtomicAddF32(EmitContext&) { 647void EmitGlobalAtomicAddF32(EmitContext&) {
541 throw NotImplementedException("GLASM instruction"); 648 throw NotImplementedException("GLASM instruction");
542} 649}
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
index dc377b053..a409a7ab3 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
@@ -105,6 +105,13 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_vi
105 pointer_offset, value, pointer_offset, value); 105 pointer_offset, value, pointer_offset, value);
106} 106}
107 107
108void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
109 std::string_view value) {
110 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
111 ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, pointer_offset, pointer_offset);
112 ctx.Add("smem[{}>>2]={}.x;smem[({}+4)>>2]={}.y;", pointer_offset, value, pointer_offset, value);
113}
114
108void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 115void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
109 const IR::Value& offset, std::string_view value) { 116 const IR::Value& offset, std::string_view value) {
110 ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), 117 ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(),
@@ -265,6 +272,97 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val
265 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); 272 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
266} 273}
267 274
275void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
276 const IR::Value& offset, std::string_view value) {
277 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
278 ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
279 binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
280 ctx.var_alloc.Consume(offset));
281 ctx.Add("{}_ssbo{}[{}>>2]+={}.x;{}_ssbo{}[({}>>2)+1]+={}.y;", ctx.stage_name, binding.U32(),
282 ctx.var_alloc.Consume(offset), value, ctx.stage_name, binding.U32(),
283 ctx.var_alloc.Consume(offset), value);
284}
285
286void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
287 const IR::Value& offset, std::string_view value) {
288 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
289 ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
290 binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
291 ctx.var_alloc.Consume(offset));
292 ctx.Add("for(int "
293 "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}",
294 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
295 binding.U32(), ctx.var_alloc.Consume(offset), value);
296}
297
298void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
299 const IR::Value& offset, std::string_view value) {
300 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
301 ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
302 binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
303 ctx.var_alloc.Consume(offset));
304 ctx.Add("for(int i=0;i<2;++i){{ "
305 "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],{}[i]);}}",
306 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
307 binding.U32(), ctx.var_alloc.Consume(offset), value);
308}
309
310void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
311 const IR::Value& offset, std::string_view value) {
312 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
313 ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
314 binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
315 ctx.var_alloc.Consume(offset));
316 ctx.Add("for(int "
317 "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}",
318 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
319 binding.U32(), ctx.var_alloc.Consume(offset), value);
320}
321
322void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
323 const IR::Value& offset, std::string_view value) {
324 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic");
325 ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name,
326 binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(),
327 ctx.var_alloc.Consume(offset));
328 ctx.Add("for(int i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],{}[i]);}}",
329 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name,
330 binding.U32(), ctx.var_alloc.Consume(offset), value);
331}
332
333void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
334 const IR::Value& offset, std::string_view value) {
335 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
336 ctx.AddU32x2("{}=uvec2(atomicAnd({}_ssbo{}[{}>>2],{}.x),atomicAnd({}_ssbo{}[({}>>2)+1],{}.y));",
337 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
338 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
339}
340
341void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
342 const IR::Value& offset, std::string_view value) {
343 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
344 ctx.AddU32x2("{}=uvec2(atomicOr({}_ssbo{}[{}>>2],{}.x),atomicOr({}_ssbo{}[({}>>2)+1],{}.y));",
345 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
346 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
347}
348
349void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
350 const IR::Value& offset, std::string_view value) {
351 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
352 ctx.AddU32x2("{}=uvec2(atomicXor({}_ssbo{}[{}>>2],{}.x),atomicXor({}_ssbo{}[({}>>2)+1],{}.y));",
353 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
354 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
355}
356
357void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
358 const IR::Value& offset, std::string_view value) {
359 LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2");
360 ctx.AddU32x2("{}=uvec2(atomicExchange({}_ssbo{}[{}>>2],{}.x),atomicExchange({}_ssbo{}[({}>>2)+"
361 "1],{}.y));",
362 inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value,
363 ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value);
364}
365
268void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 366void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
269 const IR::Value& offset, std::string_view value) { 367 const IR::Value& offset, std::string_view value) {
270 SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); 368 SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd");
@@ -388,6 +486,50 @@ void EmitGlobalAtomicExchange64(EmitContext&) {
388 throw NotImplementedException("GLSL Instrucion"); 486 throw NotImplementedException("GLSL Instrucion");
389} 487}
390 488
489void EmitGlobalAtomicIAdd32x2(EmitContext&) {
490 throw NotImplementedException("GLSL Instrucion");
491}
492
493void EmitGlobalAtomicSMin32x2(EmitContext&) {
494 throw NotImplementedException("GLSL Instrucion");
495}
496
497void EmitGlobalAtomicUMin32x2(EmitContext&) {
498 throw NotImplementedException("GLSL Instrucion");
499}
500
501void EmitGlobalAtomicSMax32x2(EmitContext&) {
502 throw NotImplementedException("GLSL Instrucion");
503}
504
505void EmitGlobalAtomicUMax32x2(EmitContext&) {
506 throw NotImplementedException("GLSL Instrucion");
507}
508
509void EmitGlobalAtomicInc32x2(EmitContext&) {
510 throw NotImplementedException("GLSL Instrucion");
511}
512
513void EmitGlobalAtomicDec32x2(EmitContext&) {
514 throw NotImplementedException("GLSL Instrucion");
515}
516
517void EmitGlobalAtomicAnd32x2(EmitContext&) {
518 throw NotImplementedException("GLSL Instrucion");
519}
520
521void EmitGlobalAtomicOr32x2(EmitContext&) {
522 throw NotImplementedException("GLSL Instrucion");
523}
524
525void EmitGlobalAtomicXor32x2(EmitContext&) {
526 throw NotImplementedException("GLSL Instrucion");
527}
528
529void EmitGlobalAtomicExchange32x2(EmitContext&) {
530 throw NotImplementedException("GLSL Instrucion");
531}
532
391void EmitGlobalAtomicAddF32(EmitContext&) { 533void EmitGlobalAtomicAddF32(EmitContext&) {
392 throw NotImplementedException("GLSL Instrucion"); 534 throw NotImplementedException("GLSL Instrucion");
393} 535}
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
index 6cabbc717..704baddc9 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -442,6 +442,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi
442 std::string_view value); 442 std::string_view value);
443void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, 443void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
444 std::string_view value); 444 std::string_view value);
445void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset,
446 std::string_view value);
445void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 447void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
446 const IR::Value& offset, std::string_view value); 448 const IR::Value& offset, std::string_view value);
447void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 449void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -482,6 +484,24 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b
482 const IR::Value& offset, std::string_view value); 484 const IR::Value& offset, std::string_view value);
483void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 485void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
484 const IR::Value& offset, std::string_view value); 486 const IR::Value& offset, std::string_view value);
487void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
488 const IR::Value& offset, std::string_view value);
489void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
490 const IR::Value& offset, std::string_view value);
491void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
492 const IR::Value& offset, std::string_view value);
493void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
494 const IR::Value& offset, std::string_view value);
495void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
496 const IR::Value& offset, std::string_view value);
497void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
498 const IR::Value& offset, std::string_view value);
499void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
500 const IR::Value& offset, std::string_view value);
501void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
502 const IR::Value& offset, std::string_view value);
503void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
504 const IR::Value& offset, std::string_view value);
485void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 505void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
486 const IR::Value& offset, std::string_view value); 506 const IR::Value& offset, std::string_view value);
487void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 507void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
@@ -518,6 +538,17 @@ void EmitGlobalAtomicAnd64(EmitContext& ctx);
518void EmitGlobalAtomicOr64(EmitContext& ctx); 538void EmitGlobalAtomicOr64(EmitContext& ctx);
519void EmitGlobalAtomicXor64(EmitContext& ctx); 539void EmitGlobalAtomicXor64(EmitContext& ctx);
520void EmitGlobalAtomicExchange64(EmitContext& ctx); 540void EmitGlobalAtomicExchange64(EmitContext& ctx);
541void EmitGlobalAtomicIAdd32x2(EmitContext& ctx);
542void EmitGlobalAtomicSMin32x2(EmitContext& ctx);
543void EmitGlobalAtomicUMin32x2(EmitContext& ctx);
544void EmitGlobalAtomicSMax32x2(EmitContext& ctx);
545void EmitGlobalAtomicUMax32x2(EmitContext& ctx);
546void EmitGlobalAtomicInc32x2(EmitContext& ctx);
547void EmitGlobalAtomicDec32x2(EmitContext& ctx);
548void EmitGlobalAtomicAnd32x2(EmitContext& ctx);
549void EmitGlobalAtomicOr32x2(EmitContext& ctx);
550void EmitGlobalAtomicXor32x2(EmitContext& ctx);
551void EmitGlobalAtomicExchange32x2(EmitContext& ctx);
521void EmitGlobalAtomicAddF32(EmitContext& ctx); 552void EmitGlobalAtomicAddF32(EmitContext& ctx);
522void EmitGlobalAtomicAddF16x2(EmitContext& ctx); 553void EmitGlobalAtomicAddF16x2(EmitContext& ctx);
523void EmitGlobalAtomicAddF32x2(EmitContext& ctx); 554void EmitGlobalAtomicAddF32x2(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index b412957c7..2b360e073 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -22,7 +22,7 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS =
22struct RescalingLayout { 22struct RescalingLayout {
23 alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures; 23 alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures;
24 alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images; 24 alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images;
25 alignas(16) u32 down_factor; 25 u32 down_factor;
26}; 26};
27constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); 27constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures);
28constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor); 28constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
index 46ba52a25..d3cbb14a9 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -82,6 +82,17 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value&
82 ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result)); 82 ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result));
83 return original_value; 83 return original_value;
84} 84}
85
86Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
87 Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
88 LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
89 const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
90 binding, offset, sizeof(u32[2]))};
91 const Id original_value{ctx.OpLoad(ctx.U32[2], pointer)};
92 const Id result{(ctx.*non_atomic_func)(ctx.U32[2], value, original_value)};
93 ctx.OpStore(pointer, result);
94 return original_value;
95}
85} // Anonymous namespace 96} // Anonymous namespace
86 97
87Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { 98Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
@@ -141,7 +152,7 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
141 const auto [scope, semantics]{AtomicArgs(ctx)}; 152 const auto [scope, semantics]{AtomicArgs(ctx)};
142 return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); 153 return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
143 } 154 }
144 LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); 155 LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
145 const Id pointer_1{SharedPointer(ctx, offset, 0)}; 156 const Id pointer_1{SharedPointer(ctx, offset, 0)};
146 const Id pointer_2{SharedPointer(ctx, offset, 1)}; 157 const Id pointer_2{SharedPointer(ctx, offset, 1)};
147 const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; 158 const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
@@ -152,6 +163,18 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) {
152 return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)); 163 return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2));
153} 164}
154 165
166Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id offset, Id value) {
167 LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
168 const Id pointer_1{SharedPointer(ctx, offset, 0)};
169 const Id pointer_2{SharedPointer(ctx, offset, 1)};
170 const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
171 const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
172 const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)};
173 ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U));
174 ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U));
175 return ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2);
176}
177
155Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 178Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
156 Id value) { 179 Id value) {
157 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd); 180 return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd);
@@ -275,6 +298,56 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const
275 return original; 298 return original;
276} 299}
277 300
301Id EmitStorageAtomicIAdd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
302 Id value) {
303 return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpIAdd);
304}
305
306Id EmitStorageAtomicSMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
307 Id value) {
308 return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpSMin);
309}
310
311Id EmitStorageAtomicUMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
312 Id value) {
313 return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpUMin);
314}
315
316Id EmitStorageAtomicSMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
317 Id value) {
318 return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpSMax);
319}
320
321Id EmitStorageAtomicUMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
322 Id value) {
323 return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpUMax);
324}
325
326Id EmitStorageAtomicAnd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
327 Id value) {
328 return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseAnd);
329}
330
331Id EmitStorageAtomicOr32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
332 Id value) {
333 return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseOr);
334}
335
336Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
337 Id value) {
338 return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseXor);
339}
340
341Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding,
342 const IR::Value& offset, Id value) {
343 LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
344 const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
345 binding, offset, sizeof(u32[2]))};
346 const Id original{ctx.OpLoad(ctx.U32[2], pointer)};
347 ctx.OpStore(pointer, value);
348 return original;
349}
350
278Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 351Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
279 Id value) { 352 Id value) {
280 const Id ssbo{ctx.ssbos[binding.U32()].U32}; 353 const Id ssbo{ctx.ssbos[binding.U32()].U32};
@@ -418,6 +491,50 @@ Id EmitGlobalAtomicExchange64(EmitContext&) {
418 throw NotImplementedException("SPIR-V Instruction"); 491 throw NotImplementedException("SPIR-V Instruction");
419} 492}
420 493
494Id EmitGlobalAtomicIAdd32x2(EmitContext&) {
495 throw NotImplementedException("SPIR-V Instruction");
496}
497
498Id EmitGlobalAtomicSMin32x2(EmitContext&) {
499 throw NotImplementedException("SPIR-V Instruction");
500}
501
502Id EmitGlobalAtomicUMin32x2(EmitContext&) {
503 throw NotImplementedException("SPIR-V Instruction");
504}
505
506Id EmitGlobalAtomicSMax32x2(EmitContext&) {
507 throw NotImplementedException("SPIR-V Instruction");
508}
509
510Id EmitGlobalAtomicUMax32x2(EmitContext&) {
511 throw NotImplementedException("SPIR-V Instruction");
512}
513
514Id EmitGlobalAtomicInc32x2(EmitContext&) {
515 throw NotImplementedException("SPIR-V Instruction");
516}
517
518Id EmitGlobalAtomicDec32x2(EmitContext&) {
519 throw NotImplementedException("SPIR-V Instruction");
520}
521
522Id EmitGlobalAtomicAnd32x2(EmitContext&) {
523 throw NotImplementedException("SPIR-V Instruction");
524}
525
526Id EmitGlobalAtomicOr32x2(EmitContext&) {
527 throw NotImplementedException("SPIR-V Instruction");
528}
529
530Id EmitGlobalAtomicXor32x2(EmitContext&) {
531 throw NotImplementedException("SPIR-V Instruction");
532}
533
534Id EmitGlobalAtomicExchange32x2(EmitContext&) {
535 throw NotImplementedException("SPIR-V Instruction");
536}
537
421Id EmitGlobalAtomicAddF32(EmitContext&) { 538Id EmitGlobalAtomicAddF32(EmitContext&) {
422 throw NotImplementedException("SPIR-V Instruction"); 539 throw NotImplementedException("SPIR-V Instruction");
423} 540}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 8ea730c80..80b4bbd27 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -123,34 +123,36 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
123} 123}
124 124
125Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, 125Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size,
126 const IR::Value& binding, const IR::Value& offset) { 126 const IR::Value& binding, const IR::Value& offset, const Id indirect_func) {
127 Id buffer_offset;
128 const Id uniform_type{ctx.uniform_types.*member_ptr};
129 if (offset.IsImmediate()) {
130 // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4)
131 const Id imm_offset{ctx.Const(offset.U32() / element_size)};
132 buffer_offset = imm_offset;
133 } else if (element_size > 1) {
134 const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
135 const Id shift{ctx.Const(log2_element_size)};
136 buffer_offset = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
137 } else {
138 buffer_offset = ctx.Def(offset);
139 }
127 if (!binding.IsImmediate()) { 140 if (!binding.IsImmediate()) {
128 throw NotImplementedException("Constant buffer indexing"); 141 return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset);
129 } 142 }
130 const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; 143 const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
131 const Id uniform_type{ctx.uniform_types.*member_ptr}; 144 const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)};
132 if (!offset.IsImmediate()) {
133 Id index{ctx.Def(offset)};
134 if (element_size > 1) {
135 const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
136 const Id shift{ctx.Const(log2_element_size)};
137 index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift);
138 }
139 const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)};
140 return ctx.OpLoad(result_type, access_chain);
141 }
142 // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4)
143 const Id imm_offset{ctx.Const(offset.U32() / element_size)};
144 const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)};
145 return ctx.OpLoad(result_type, access_chain); 145 return ctx.OpLoad(result_type, access_chain);
146} 146}
147 147
148Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { 148Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
149 return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); 149 return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset,
150 ctx.load_const_func_u32);
150} 151}
151 152
152Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { 153Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
153 return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset); 154 return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset,
155 ctx.load_const_func_u32x4);
154} 156}
155 157
156Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) { 158Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) {
@@ -201,7 +203,8 @@ void EmitGetIndirectBranchVariable(EmitContext&) {
201 203
202Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { 204Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
203 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { 205 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
204 const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; 206 const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset,
207 ctx.load_const_func_u8)};
205 return ctx.OpUConvert(ctx.U32[1], load); 208 return ctx.OpUConvert(ctx.U32[1], load);
206 } 209 }
207 Id element{}; 210 Id element{};
@@ -217,7 +220,8 @@ Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& of
217 220
218Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { 221Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
219 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { 222 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) {
220 const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; 223 const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset,
224 ctx.load_const_func_u8)};
221 return ctx.OpSConvert(ctx.U32[1], load); 225 return ctx.OpSConvert(ctx.U32[1], load);
222 } 226 }
223 Id element{}; 227 Id element{};
@@ -233,8 +237,8 @@ Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& of
233 237
234Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { 238Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
235 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { 239 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
236 const Id load{ 240 const Id load{GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset,
237 GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; 241 ctx.load_const_func_u16)};
238 return ctx.OpUConvert(ctx.U32[1], load); 242 return ctx.OpUConvert(ctx.U32[1], load);
239 } 243 }
240 Id element{}; 244 Id element{};
@@ -250,8 +254,8 @@ Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
250 254
251Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { 255Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
252 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { 256 if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) {
253 const Id load{ 257 const Id load{GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset,
254 GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; 258 ctx.load_const_func_u16)};
255 return ctx.OpSConvert(ctx.U32[1], load); 259 return ctx.OpSConvert(ctx.U32[1], load);
256 } 260 }
257 Id element{}; 261 Id element{};
@@ -276,7 +280,8 @@ Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
276 280
277Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { 281Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
278 if (ctx.profile.support_descriptor_aliasing) { 282 if (ctx.profile.support_descriptor_aliasing) {
279 return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); 283 return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset,
284 ctx.load_const_func_f32);
280 } else { 285 } else {
281 const Id vector{GetCbufU32x4(ctx, binding, offset)}; 286 const Id vector{GetCbufU32x4(ctx, binding, offset)};
282 return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u)); 287 return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u));
@@ -285,8 +290,8 @@ Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
285 290
286Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { 291Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
287 if (ctx.profile.support_descriptor_aliasing) { 292 if (ctx.profile.support_descriptor_aliasing) {
288 return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, 293 return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset,
289 offset); 294 ctx.load_const_func_u32x2);
290 } else { 295 } else {
291 const Id vector{GetCbufU32x4(ctx, binding, offset)}; 296 const Id vector{GetCbufU32x4(ctx, binding, offset)};
292 return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u), 297 return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u),
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 887112deb..f263b41b0 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -335,6 +335,7 @@ Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
335Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); 335Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
336Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); 336Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
337Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); 337Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
338Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id pointer_offset, Id value);
338Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 339Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
339 Id value); 340 Id value);
340Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 341Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
@@ -375,6 +376,24 @@ Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::
375 Id value); 376 Id value);
376Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 377Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
377 Id value); 378 Id value);
379Id EmitStorageAtomicIAdd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
380 Id value);
381Id EmitStorageAtomicSMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
382 Id value);
383Id EmitStorageAtomicUMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
384 Id value);
385Id EmitStorageAtomicSMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
386 Id value);
387Id EmitStorageAtomicUMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
388 Id value);
389Id EmitStorageAtomicAnd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
390 Id value);
391Id EmitStorageAtomicOr32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
392 Id value);
393Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
394 Id value);
395Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding,
396 const IR::Value& offset, Id value);
378Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 397Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
379 Id value); 398 Id value);
380Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, 399Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
@@ -411,6 +430,17 @@ Id EmitGlobalAtomicAnd64(EmitContext& ctx);
411Id EmitGlobalAtomicOr64(EmitContext& ctx); 430Id EmitGlobalAtomicOr64(EmitContext& ctx);
412Id EmitGlobalAtomicXor64(EmitContext& ctx); 431Id EmitGlobalAtomicXor64(EmitContext& ctx);
413Id EmitGlobalAtomicExchange64(EmitContext& ctx); 432Id EmitGlobalAtomicExchange64(EmitContext& ctx);
433Id EmitGlobalAtomicIAdd32x2(EmitContext& ctx);
434Id EmitGlobalAtomicSMin32x2(EmitContext& ctx);
435Id EmitGlobalAtomicUMin32x2(EmitContext& ctx);
436Id EmitGlobalAtomicSMax32x2(EmitContext& ctx);
437Id EmitGlobalAtomicUMax32x2(EmitContext& ctx);
438Id EmitGlobalAtomicInc32x2(EmitContext& ctx);
439Id EmitGlobalAtomicDec32x2(EmitContext& ctx);
440Id EmitGlobalAtomicAnd32x2(EmitContext& ctx);
441Id EmitGlobalAtomicOr32x2(EmitContext& ctx);
442Id EmitGlobalAtomicXor32x2(EmitContext& ctx);
443Id EmitGlobalAtomicExchange32x2(EmitContext& ctx);
414Id EmitGlobalAtomicAddF32(EmitContext& ctx); 444Id EmitGlobalAtomicAddF32(EmitContext& ctx);
415Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); 445Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
416Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); 446Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index cd90c084a..aa5b6c9b7 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -464,6 +464,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
464 DefineSharedMemory(program); 464 DefineSharedMemory(program);
465 DefineSharedMemoryFunctions(program); 465 DefineSharedMemoryFunctions(program);
466 DefineConstantBuffers(program.info, uniform_binding); 466 DefineConstantBuffers(program.info, uniform_binding);
467 DefineConstantBufferIndirectFunctions(program.info);
467 DefineStorageBuffers(program.info, storage_binding); 468 DefineStorageBuffers(program.info, storage_binding);
468 DefineTextureBuffers(program.info, texture_binding); 469 DefineTextureBuffers(program.info, texture_binding);
469 DefineImageBuffers(program.info, image_binding); 470 DefineImageBuffers(program.info, image_binding);
@@ -993,7 +994,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
993 } 994 }
994 return; 995 return;
995 } 996 }
996 IR::Type types{info.used_constant_buffer_types}; 997 IR::Type types{info.used_constant_buffer_types | info.used_indirect_cbuf_types};
997 if (True(types & IR::Type::U8)) { 998 if (True(types & IR::Type::U8)) {
998 if (profile.support_int8) { 999 if (profile.support_int8) {
999 DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); 1000 DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8));
@@ -1027,6 +1028,62 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
1027 binding += static_cast<u32>(info.constant_buffer_descriptors.size()); 1028 binding += static_cast<u32>(info.constant_buffer_descriptors.size());
1028} 1029}
1029 1030
1031void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) {
1032 if (!info.uses_cbuf_indirect) {
1033 return;
1034 }
1035 const auto make_accessor{[&](Id buffer_type, Id UniformDefinitions::*member_ptr) {
1036 const Id func_type{TypeFunction(buffer_type, U32[1], U32[1])};
1037 const Id func{OpFunction(buffer_type, spv::FunctionControlMask::MaskNone, func_type)};
1038 const Id binding{OpFunctionParameter(U32[1])};
1039 const Id offset{OpFunctionParameter(U32[1])};
1040
1041 AddLabel();
1042
1043 const Id merge_label{OpLabel()};
1044 const Id uniform_type{uniform_types.*member_ptr};
1045
1046 std::array<Id, Info::MAX_CBUFS> buf_labels;
1047 std::array<Sirit::Literal, Info::MAX_CBUFS> buf_literals;
1048 for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
1049 buf_labels[i] = OpLabel();
1050 buf_literals[i] = Sirit::Literal{i};
1051 }
1052 OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
1053 OpSwitch(binding, buf_labels[0], buf_literals, buf_labels);
1054 for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
1055 AddLabel(buf_labels[i]);
1056 const Id cbuf{cbufs[i].*member_ptr};
1057 const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)};
1058 const Id result{OpLoad(buffer_type, access_chain)};
1059 OpReturnValue(result);
1060 }
1061 AddLabel(merge_label);
1062 OpUnreachable();
1063 OpFunctionEnd();
1064 return func;
1065 }};
1066 IR::Type types{info.used_indirect_cbuf_types};
1067 if (True(types & IR::Type::U8)) {
1068 load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8);
1069 }
1070 if (True(types & IR::Type::U16)) {
1071 load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16);
1072 }
1073 if (True(types & IR::Type::F32)) {
1074 load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32);
1075 }
1076 if (True(types & IR::Type::U32)) {
1077 load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32);
1078 }
1079 if (True(types & IR::Type::U32x2)) {
1080 load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2);
1081 }
1082 if (True(types & IR::Type::U32x4)) {
1083 load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4);
1084 }
1085}
1086
1030void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { 1087void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
1031 if (info.storage_buffers_descriptors.empty()) { 1088 if (info.storage_buffers_descriptors.empty()) {
1032 return; 1089 return;
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index f87138f7e..906a1dc2c 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -294,6 +294,13 @@ public:
294 294
295 std::vector<Id> interfaces; 295 std::vector<Id> interfaces;
296 296
297 Id load_const_func_u8{};
298 Id load_const_func_u16{};
299 Id load_const_func_u32{};
300 Id load_const_func_f32{};
301 Id load_const_func_u32x2{};
302 Id load_const_func_u32x4{};
303
297private: 304private:
298 void DefineCommonTypes(const Info& info); 305 void DefineCommonTypes(const Info& info);
299 void DefineCommonConstants(); 306 void DefineCommonConstants();
@@ -302,6 +309,7 @@ private:
302 void DefineSharedMemory(const IR::Program& program); 309 void DefineSharedMemory(const IR::Program& program);
303 void DefineSharedMemoryFunctions(const IR::Program& program); 310 void DefineSharedMemoryFunctions(const IR::Program& program);
304 void DefineConstantBuffers(const Info& info, u32& binding); 311 void DefineConstantBuffers(const Info& info, u32& binding);
312 void DefineConstantBufferIndirectFunctions(const Info& info);
305 void DefineStorageBuffers(const Info& info, u32& binding); 313 void DefineStorageBuffers(const Info& info, u32& binding);
306 void DefineTextureBuffers(const Info& info, u32& binding); 314 void DefineTextureBuffers(const Info& info, u32& binding);
307 void DefineImageBuffers(const Info& info, u32& binding); 315 void DefineImageBuffers(const Info& info, u32& binding);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 97e2bf6af..631446cf7 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -118,6 +118,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
118 case Opcode::SharedAtomicXor32: 118 case Opcode::SharedAtomicXor32:
119 case Opcode::SharedAtomicExchange32: 119 case Opcode::SharedAtomicExchange32:
120 case Opcode::SharedAtomicExchange64: 120 case Opcode::SharedAtomicExchange64:
121 case Opcode::SharedAtomicExchange32x2:
121 case Opcode::GlobalAtomicIAdd32: 122 case Opcode::GlobalAtomicIAdd32:
122 case Opcode::GlobalAtomicSMin32: 123 case Opcode::GlobalAtomicSMin32:
123 case Opcode::GlobalAtomicUMin32: 124 case Opcode::GlobalAtomicUMin32:
@@ -138,6 +139,15 @@ bool Inst::MayHaveSideEffects() const noexcept {
138 case Opcode::GlobalAtomicOr64: 139 case Opcode::GlobalAtomicOr64:
139 case Opcode::GlobalAtomicXor64: 140 case Opcode::GlobalAtomicXor64:
140 case Opcode::GlobalAtomicExchange64: 141 case Opcode::GlobalAtomicExchange64:
142 case Opcode::GlobalAtomicIAdd32x2:
143 case Opcode::GlobalAtomicSMin32x2:
144 case Opcode::GlobalAtomicUMin32x2:
145 case Opcode::GlobalAtomicSMax32x2:
146 case Opcode::GlobalAtomicUMax32x2:
147 case Opcode::GlobalAtomicAnd32x2:
148 case Opcode::GlobalAtomicOr32x2:
149 case Opcode::GlobalAtomicXor32x2:
150 case Opcode::GlobalAtomicExchange32x2:
141 case Opcode::GlobalAtomicAddF32: 151 case Opcode::GlobalAtomicAddF32:
142 case Opcode::GlobalAtomicAddF16x2: 152 case Opcode::GlobalAtomicAddF16x2:
143 case Opcode::GlobalAtomicAddF32x2: 153 case Opcode::GlobalAtomicAddF32x2:
@@ -165,6 +175,15 @@ bool Inst::MayHaveSideEffects() const noexcept {
165 case Opcode::StorageAtomicOr64: 175 case Opcode::StorageAtomicOr64:
166 case Opcode::StorageAtomicXor64: 176 case Opcode::StorageAtomicXor64:
167 case Opcode::StorageAtomicExchange64: 177 case Opcode::StorageAtomicExchange64:
178 case Opcode::StorageAtomicIAdd32x2:
179 case Opcode::StorageAtomicSMin32x2:
180 case Opcode::StorageAtomicUMin32x2:
181 case Opcode::StorageAtomicSMax32x2:
182 case Opcode::StorageAtomicUMax32x2:
183 case Opcode::StorageAtomicAnd32x2:
184 case Opcode::StorageAtomicOr32x2:
185 case Opcode::StorageAtomicXor32x2:
186 case Opcode::StorageAtomicExchange32x2:
168 case Opcode::StorageAtomicAddF32: 187 case Opcode::StorageAtomicAddF32:
169 case Opcode::StorageAtomicAddF16x2: 188 case Opcode::StorageAtomicAddF16x2:
170 case Opcode::StorageAtomicAddF32x2: 189 case Opcode::StorageAtomicAddF32x2:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index b94ce7406..efb6bfac3 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -341,6 +341,7 @@ OPCODE(SharedAtomicOr32, U32, U32,
341OPCODE(SharedAtomicXor32, U32, U32, U32, ) 341OPCODE(SharedAtomicXor32, U32, U32, U32, )
342OPCODE(SharedAtomicExchange32, U32, U32, U32, ) 342OPCODE(SharedAtomicExchange32, U32, U32, U32, )
343OPCODE(SharedAtomicExchange64, U64, U32, U64, ) 343OPCODE(SharedAtomicExchange64, U64, U32, U64, )
344OPCODE(SharedAtomicExchange32x2, U32x2, U32, U32x2, )
344 345
345OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) 346OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
346OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) 347OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
@@ -362,6 +363,15 @@ OPCODE(GlobalAtomicAnd64, U64, U64,
362OPCODE(GlobalAtomicOr64, U64, U64, U64, ) 363OPCODE(GlobalAtomicOr64, U64, U64, U64, )
363OPCODE(GlobalAtomicXor64, U64, U64, U64, ) 364OPCODE(GlobalAtomicXor64, U64, U64, U64, )
364OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) 365OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
366OPCODE(GlobalAtomicIAdd32x2, U32x2, U32x2, U32x2, )
367OPCODE(GlobalAtomicSMin32x2, U32x2, U32x2, U32x2, )
368OPCODE(GlobalAtomicUMin32x2, U32x2, U32x2, U32x2, )
369OPCODE(GlobalAtomicSMax32x2, U32x2, U32x2, U32x2, )
370OPCODE(GlobalAtomicUMax32x2, U32x2, U32x2, U32x2, )
371OPCODE(GlobalAtomicAnd32x2, U32x2, U32x2, U32x2, )
372OPCODE(GlobalAtomicOr32x2, U32x2, U32x2, U32x2, )
373OPCODE(GlobalAtomicXor32x2, U32x2, U32x2, U32x2, )
374OPCODE(GlobalAtomicExchange32x2, U32x2, U32x2, U32x2, )
365OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) 375OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
366OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) 376OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
367OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) 377OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
@@ -390,6 +400,15 @@ OPCODE(StorageAtomicAnd64, U64, U32,
390OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) 400OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
391OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) 401OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
392OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) 402OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
403OPCODE(StorageAtomicIAdd32x2, U32x2, U32, U32, U32x2, )
404OPCODE(StorageAtomicSMin32x2, U32x2, U32, U32, U32x2, )
405OPCODE(StorageAtomicUMin32x2, U32x2, U32, U32, U32x2, )
406OPCODE(StorageAtomicSMax32x2, U32x2, U32, U32, U32x2, )
407OPCODE(StorageAtomicUMax32x2, U32x2, U32, U32, U32x2, )
408OPCODE(StorageAtomicAnd32x2, U32x2, U32, U32, U32x2, )
409OPCODE(StorageAtomicOr32x2, U32x2, U32, U32, U32x2, )
410OPCODE(StorageAtomicXor32x2, U32x2, U32, U32, U32x2, )
411OPCODE(StorageAtomicExchange32x2, U32x2, U32, U32, U32x2, )
393OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) 412OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
394OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) 413OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
395OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) 414OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
index 2300088e3..8007a4d46 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -11,10 +11,20 @@ namespace Shader::Maxwell {
11using namespace LDC; 11using namespace LDC;
12namespace { 12namespace {
13std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, 13std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
14 const IR::U32& reg, const IR::U32& imm) { 14 const IR::U32& reg, const IR::U32& imm_offset) {
15 switch (mode) { 15 switch (mode) {
16 case Mode::Default: 16 case Mode::Default:
17 return {imm_index, ir.IAdd(reg, imm)}; 17 return {imm_index, ir.IAdd(reg, imm_offset)};
18 case Mode::IS: {
19 // Segmented addressing mode
20 // Ra+imm_offset points into a flat mapping of const buffer
21 // address space
22 const IR::U32 address{ir.IAdd(reg, imm_offset)};
23 const IR::U32 index{ir.BitFieldExtract(address, ir.Imm32(16), ir.Imm32(16))};
24 const IR::U32 offset{ir.BitFieldExtract(address, ir.Imm32(0), ir.Imm32(16))};
25
26 return {ir.IAdd(index, imm_index), offset};
27 }
18 default: 28 default:
19 break; 29 break;
20 } 30 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
index e0fe47912..f3c7ceb57 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -13,59 +13,535 @@ namespace {
13// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) 13// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
14IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, 14IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
15 u64 ttbl) { 15 u64 ttbl) {
16 IR::U32 r{ir.Imm32(0)}; 16 switch (ttbl) {
17 const IR::U32 not_a{ir.BitwiseNot(a)}; 17 // generated code, do not edit manually
18 const IR::U32 not_b{ir.BitwiseNot(b)}; 18 case 0:
19 const IR::U32 not_c{ir.BitwiseNot(c)}; 19 return ir.Imm32(0);
20 if (ttbl & 0x01) { 20 case 1:
21 // r |= ~a & ~b & ~c; 21 return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseOr(b, c)));
22 const auto lhs{ir.BitwiseAnd(not_a, not_b)}; 22 case 2:
23 const auto rhs{ir.BitwiseAnd(lhs, not_c)}; 23 return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
24 r = ir.BitwiseOr(r, rhs); 24 case 3:
25 return ir.BitwiseNot(ir.BitwiseOr(a, b));
26 case 4:
27 return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
28 case 5:
29 return ir.BitwiseNot(ir.BitwiseOr(a, c));
30 case 6:
31 return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
32 case 7:
33 return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseAnd(b, c)));
34 case 8:
35 return ir.BitwiseAnd(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
36 case 9:
37 return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseXor(b, c)));
38 case 10:
39 return ir.BitwiseAnd(c, ir.BitwiseNot(a));
40 case 11:
41 return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(c, ir.BitwiseNot(b)));
42 case 12:
43 return ir.BitwiseAnd(b, ir.BitwiseNot(a));
44 case 13:
45 return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, ir.BitwiseNot(c)));
46 case 14:
47 return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
48 case 15:
49 return ir.BitwiseNot(a);
50 case 16:
51 return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
52 case 17:
53 return ir.BitwiseNot(ir.BitwiseOr(b, c));
54 case 18:
55 return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
56 case 19:
57 return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseAnd(a, c)));
58 case 20:
59 return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
60 case 21:
61 return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
62 case 22:
63 return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
64 case 23:
65 return ir.BitwiseXor(ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)),
66 ir.BitwiseNot(a));
67 case 24:
68 return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
69 case 25:
70 return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c)));
71 case 26:
72 return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
73 case 27:
74 return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
75 case 28:
76 return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
77 case 29:
78 return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
79 case 30:
80 return ir.BitwiseXor(a, ir.BitwiseOr(b, c));
81 case 31:
82 return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)));
83 case 32:
84 return ir.BitwiseAnd(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
85 case 33:
86 return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
87 case 34:
88 return ir.BitwiseAnd(c, ir.BitwiseNot(b));
89 case 35:
90 return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
91 case 36:
92 return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(b, c));
93 case 37:
94 return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c)));
95 case 38:
96 return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
97 case 39:
98 return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
99 case 40:
100 return ir.BitwiseAnd(c, ir.BitwiseXor(a, b));
101 case 41:
102 return ir.BitwiseXor(ir.BitwiseOr(a, b),
103 ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)));
104 case 42:
105 return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
106 case 43:
107 return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)),
108 ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
109 case 44:
110 return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b));
111 case 45:
112 return ir.BitwiseXor(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
113 case 46:
114 return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(b, c));
115 case 47:
116 return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(b)), ir.BitwiseNot(a));
117 case 48:
118 return ir.BitwiseAnd(a, ir.BitwiseNot(b));
119 case 49:
120 return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, ir.BitwiseNot(c)));
121 case 50:
122 return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
123 case 51:
124 return ir.BitwiseNot(b);
125 case 52:
126 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
127 case 53:
128 return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
129 case 54:
130 return ir.BitwiseXor(b, ir.BitwiseOr(a, c));
131 case 55:
132 return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)));
133 case 56:
134 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b));
135 case 57:
136 return ir.BitwiseXor(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
137 case 58:
138 return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(a, c));
139 case 59:
140 return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseNot(b));
141 case 60:
142 return ir.BitwiseXor(a, b);
143 case 61:
144 return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, c)), ir.BitwiseXor(a, b));
145 case 62:
146 return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, b));
147 case 63:
148 return ir.BitwiseNot(ir.BitwiseAnd(a, b));
149 case 64:
150 return ir.BitwiseAnd(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
151 case 65:
152 return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
153 case 66:
154 return ir.BitwiseAnd(ir.BitwiseXor(a, c), ir.BitwiseXor(b, c));
155 case 67:
156 return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b)));
157 case 68:
158 return ir.BitwiseAnd(b, ir.BitwiseNot(c));
159 case 69:
160 return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
161 case 70:
162 return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
163 case 71:
164 return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
165 case 72:
166 return ir.BitwiseAnd(b, ir.BitwiseXor(a, c));
167 case 73:
168 return ir.BitwiseXor(ir.BitwiseOr(a, c),
169 ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)));
170 case 74:
171 return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c));
172 case 75:
173 return ir.BitwiseXor(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
174 case 76:
175 return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
176 case 77:
177 return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)),
178 ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
179 case 78:
180 return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(b, c));
181 case 79:
182 return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(c)), ir.BitwiseNot(a));
183 case 80:
184 return ir.BitwiseAnd(a, ir.BitwiseNot(c));
185 case 81:
186 return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, ir.BitwiseNot(b)));
187 case 82:
188 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
189 case 83:
190 return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
191 case 84:
192 return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
193 case 85:
194 return ir.BitwiseNot(c);
195 case 86:
196 return ir.BitwiseXor(c, ir.BitwiseOr(a, b));
197 case 87:
198 return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)));
199 case 88:
200 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c));
201 case 89:
202 return ir.BitwiseXor(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
203 case 90:
204 return ir.BitwiseXor(a, c);
205 case 91:
206 return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(a, c));
207 case 92:
208 return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(a, b));
209 case 93:
210 return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseNot(c));
211 case 94:
212 return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, c));
213 case 95:
214 return ir.BitwiseNot(ir.BitwiseAnd(a, c));
215 case 96:
216 return ir.BitwiseAnd(a, ir.BitwiseXor(b, c));
217 case 97:
218 return ir.BitwiseXor(ir.BitwiseOr(b, c),
219 ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)));
220 case 98:
221 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c));
222 case 99:
223 return ir.BitwiseXor(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
224 case 100:
225 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c));
226 case 101:
227 return ir.BitwiseXor(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
228 case 102:
229 return ir.BitwiseXor(b, c);
230 case 103:
231 return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(b, c));
232 case 104:
233 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(c, ir.BitwiseAnd(a, b)));
234 case 105:
235 return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
236 case 106:
237 return ir.BitwiseXor(c, ir.BitwiseAnd(a, b));
238 case 107:
239 return ir.BitwiseXor(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)),
240 ir.BitwiseXor(a, ir.BitwiseNot(b)));
241 case 108:
242 return ir.BitwiseXor(b, ir.BitwiseAnd(a, c));
243 case 109:
244 return ir.BitwiseXor(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)),
245 ir.BitwiseXor(a, ir.BitwiseNot(c)));
246 case 110:
247 return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
248 case 111:
249 return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
250 case 112:
251 return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
252 case 113:
253 return ir.BitwiseXor(ir.BitwiseOr(b, ir.BitwiseNot(a)),
254 ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
255 case 114:
256 return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, c));
257 case 115:
258 return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseNot(b));
259 case 116:
260 return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, b));
261 case 117:
262 return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseNot(c));
263 case 118:
264 return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
265 case 119:
266 return ir.BitwiseNot(ir.BitwiseAnd(b, c));
267 case 120:
268 return ir.BitwiseXor(a, ir.BitwiseAnd(b, c));
269 case 121:
270 return ir.BitwiseXor(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)),
271 ir.BitwiseXor(b, ir.BitwiseNot(c)));
272 case 122:
273 return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
274 case 123:
275 return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
276 case 124:
277 return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
278 case 125:
279 return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
280 case 126:
281 return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
282 case 127:
283 return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseAnd(b, c)));
284 case 128:
285 return ir.BitwiseAnd(a, ir.BitwiseAnd(b, c));
286 case 129:
287 return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
288 case 130:
289 return ir.BitwiseAnd(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
290 case 131:
291 return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
292 case 132:
293 return ir.BitwiseAnd(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
294 case 133:
295 return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
296 case 134:
297 return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
298 case 135:
299 return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
300 case 136:
301 return ir.BitwiseAnd(b, c);
302 case 137:
303 return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
304 case 138:
305 return ir.BitwiseAnd(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
306 case 139:
307 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
308 case 140:
309 return ir.BitwiseAnd(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
310 case 141:
311 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, c)));
312 case 142:
313 return ir.BitwiseXor(a, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
314 case 143:
315 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
316 case 144:
317 return ir.BitwiseAnd(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
318 case 145:
319 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
320 case 146:
321 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
322 case 147:
323 return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
324 case 148:
325 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
326 case 149:
327 return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
328 case 150:
329 return ir.BitwiseXor(a, ir.BitwiseXor(b, c));
330 case 151:
331 return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)),
332 ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
333 case 152:
334 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
335 case 153:
336 return ir.BitwiseXor(b, ir.BitwiseNot(c));
337 case 154:
338 return ir.BitwiseXor(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
339 case 155:
340 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c)));
341 case 156:
342 return ir.BitwiseXor(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
343 case 157:
344 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c)));
345 case 158:
346 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseOr(b, c)));
347 case 159:
348 return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseXor(b, c)));
349 case 160:
350 return ir.BitwiseAnd(a, c);
351 case 161:
352 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
353 case 162:
354 return ir.BitwiseAnd(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
355 case 163:
356 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
357 case 164:
358 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
359 case 165:
360 return ir.BitwiseXor(a, ir.BitwiseNot(c));
361 case 166:
362 return ir.BitwiseXor(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
363 case 167:
364 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c)));
365 case 168:
366 return ir.BitwiseAnd(c, ir.BitwiseOr(a, b));
367 case 169:
368 return ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
369 case 170:
370 return c;
371 case 171:
372 return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
373 case 172:
374 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
375 case 173:
376 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(c)));
377 case 174:
378 return ir.BitwiseOr(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
379 case 175:
380 return ir.BitwiseOr(c, ir.BitwiseNot(a));
381 case 176:
382 return ir.BitwiseAnd(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
383 case 177:
384 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(b, c)));
385 case 178:
386 return ir.BitwiseXor(b, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
387 case 179:
388 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
389 case 180:
390 return ir.BitwiseXor(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
391 case 181:
392 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c)));
393 case 182:
394 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseOr(a, c)));
395 case 183:
396 return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseXor(a, c)));
397 case 184:
398 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
399 case 185:
400 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseNot(c)));
401 case 186:
402 return ir.BitwiseOr(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
403 case 187:
404 return ir.BitwiseOr(c, ir.BitwiseNot(b));
405 case 188:
406 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b));
407 case 189:
408 return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
409 case 190:
410 return ir.BitwiseOr(c, ir.BitwiseXor(a, b));
411 case 191:
412 return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
413 case 192:
414 return ir.BitwiseAnd(a, b);
415 case 193:
416 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
417 case 194:
418 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
419 case 195:
420 return ir.BitwiseXor(a, ir.BitwiseNot(b));
421 case 196:
422 return ir.BitwiseAnd(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
423 case 197:
424 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(a, c)));
425 case 198:
426 return ir.BitwiseXor(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
427 case 199:
428 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b)));
429 case 200:
430 return ir.BitwiseAnd(b, ir.BitwiseOr(a, c));
431 case 201:
432 return ir.BitwiseXor(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
433 case 202:
434 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
435 case 203:
436 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
437 case 204:
438 return b;
439 case 205:
440 return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
441 case 206:
442 return ir.BitwiseOr(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
443 case 207:
444 return ir.BitwiseOr(b, ir.BitwiseNot(a));
445 case 208:
446 return ir.BitwiseAnd(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
447 case 209:
448 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(b, c)));
449 case 210:
450 return ir.BitwiseXor(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
451 case 211:
452 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b)));
453 case 212:
454 return ir.BitwiseXor(c, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
455 case 213:
456 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
457 case 214:
458 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(c, ir.BitwiseOr(a, b)));
459 case 215:
460 return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseXor(a, b)));
461 case 216:
462 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
463 case 217:
464 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
465 case 218:
466 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c));
467 case 219:
468 return ir.BitwiseOr(ir.BitwiseXor(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
469 case 220:
470 return ir.BitwiseOr(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
471 case 221:
472 return ir.BitwiseOr(b, ir.BitwiseNot(c));
473 case 222:
474 return ir.BitwiseOr(b, ir.BitwiseXor(a, c));
475 case 223:
476 return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
477 case 224:
478 return ir.BitwiseAnd(a, ir.BitwiseOr(b, c));
479 case 225:
480 return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
481 case 226:
482 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
483 case 227:
484 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
485 case 228:
486 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
487 case 229:
488 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
489 case 230:
490 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c));
491 case 231:
492 return ir.BitwiseOr(ir.BitwiseXor(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
493 case 232:
494 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
495 case 233:
496 return ir.BitwiseOr(ir.BitwiseAnd(a, b),
497 ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b)));
498 case 234:
499 return ir.BitwiseOr(c, ir.BitwiseAnd(a, b));
500 case 235:
501 return ir.BitwiseOr(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
502 case 236:
503 return ir.BitwiseOr(b, ir.BitwiseAnd(a, c));
504 case 237:
505 return ir.BitwiseOr(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
506 case 238:
507 return ir.BitwiseOr(b, c);
508 case 239:
509 return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
510 case 240:
511 return a;
512 case 241:
513 return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
514 case 242:
515 return ir.BitwiseOr(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
516 case 243:
517 return ir.BitwiseOr(a, ir.BitwiseNot(b));
518 case 244:
519 return ir.BitwiseOr(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
520 case 245:
521 return ir.BitwiseOr(a, ir.BitwiseNot(c));
522 case 246:
523 return ir.BitwiseOr(a, ir.BitwiseXor(b, c));
524 case 247:
525 return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
526 case 248:
527 return ir.BitwiseOr(a, ir.BitwiseAnd(b, c));
528 case 249:
529 return ir.BitwiseOr(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
530 case 250:
531 return ir.BitwiseOr(a, c);
532 case 251:
533 return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
534 case 252:
535 return ir.BitwiseOr(a, b);
536 case 253:
537 return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
538 case 254:
539 return ir.BitwiseOr(a, ir.BitwiseOr(b, c));
540 case 255:
541 return ir.Imm32(0xFFFFFFFF);
542 // end of generated code
25 } 543 }
26 if (ttbl & 0x02) { 544 throw NotImplementedException("LOP3 with out of range ttbl");
27 // r |= ~a & ~b & c;
28 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
29 const auto rhs{ir.BitwiseAnd(lhs, c)};
30 r = ir.BitwiseOr(r, rhs);
31 }
32 if (ttbl & 0x04) {
33 // r |= ~a & b & ~c;
34 const auto lhs{ir.BitwiseAnd(not_a, b)};
35 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
36 r = ir.BitwiseOr(r, rhs);
37 }
38 if (ttbl & 0x08) {
39 // r |= ~a & b & c;
40 const auto lhs{ir.BitwiseAnd(not_a, b)};
41 const auto rhs{ir.BitwiseAnd(lhs, c)};
42 r = ir.BitwiseOr(r, rhs);
43 }
44 if (ttbl & 0x10) {
45 // r |= a & ~b & ~c;
46 const auto lhs{ir.BitwiseAnd(a, not_b)};
47 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
48 r = ir.BitwiseOr(r, rhs);
49 }
50 if (ttbl & 0x20) {
51 // r |= a & ~b & c;
52 const auto lhs{ir.BitwiseAnd(a, not_b)};
53 const auto rhs{ir.BitwiseAnd(lhs, c)};
54 r = ir.BitwiseOr(r, rhs);
55 }
56 if (ttbl & 0x40) {
57 // r |= a & b & ~c;
58 const auto lhs{ir.BitwiseAnd(a, b)};
59 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
60 r = ir.BitwiseOr(r, rhs);
61 }
62 if (ttbl & 0x80) {
63 // r |= a & b & c;
64 const auto lhs{ir.BitwiseAnd(a, b)};
65 const auto rhs{ir.BitwiseAnd(lhs, c)};
66 r = ir.BitwiseOr(r, rhs);
67 }
68 return r;
69} 545}
70 546
71IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { 547IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
new file mode 100644
index 000000000..8f547c266
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
@@ -0,0 +1,92 @@
1# Copyright © 2022 degasus <markus@selfnet.de>
2# This work is free. You can redistribute it and/or modify it under the
3# terms of the Do What The Fuck You Want To Public License, Version 2,
4# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
5
6from itertools import product
7
8# The primitive instructions
9OPS = {
10 'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b),
11 'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b),
12 'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b),
13 'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions
14}
15
16# Our database of combination of instructions
17optimized_calls = {}
18def cmp(lhs, rhs):
19 if lhs is None: # new entry
20 return True
21 if lhs[3] > rhs[3]: # costs
22 return True
23 if lhs[3] < rhs[3]: # costs
24 return False
25 if len(lhs[0]) > len(rhs[0]): # string len
26 return True
27 if len(lhs[0]) < len(rhs[0]): # string len
28 return False
29 if lhs[0] > rhs[0]: # string sorting
30 return True
31 if lhs[0] < rhs[0]: # string sorting
32 return False
33 assert lhs == rhs, "redundant instruction, bug in brute force"
34 return False
35def register(imm, instruction, count, latency):
36 # Use the sum of instruction count and latency as costs to evaluate which combination is best
37 costs = count + latency
38
39 old = optimized_calls.get(imm, None)
40 new = (instruction, count, latency, costs)
41
42 # Update if new or better
43 if cmp(old, new):
44 optimized_calls[imm] = new
45 return True
46
47 return False
48
49# Constants: 0, 1 (for free)
50register(0, 'ir.Imm32(0)', 0, 0)
51register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0)
52
53# Inputs: a, b, c (for free)
54ta = 0xF0
55tb = 0xCC
56tc = 0xAA
57inputs = {
58 ta : 'a',
59 tb : 'b',
60 tc : 'c',
61}
62for imm, instruction in inputs.items():
63 register(imm, instruction, 0, 0)
64 register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs
65
66# Try to combine two values from the db with an instruction.
67# If it is better than the old method, update it.
68while True:
69 registered = 0
70 calls_copy = optimized_calls.copy()
71 for OP, (argc, cost, f) in OPS.items():
72 for args in product(calls_copy.items(), repeat=argc):
73 # unpack(transponse) the arrays
74 imm = [arg[0] for arg in args]
75 value = [arg[1][0] for arg in args]
76 count = [arg[1][1] for arg in args]
77 latency = [arg[1][2] for arg in args]
78
79 registered += register(
80 f(*imm),
81 OP.format(*value),
82 sum(count) + cost,
83 max(latency) + cost)
84 if registered == 0:
85 # No update at all? So terminate
86 break
87
88# Hacky output. Please improve me to output valid C++ instead.
89s = """ case {imm}:
90 return {op};"""
91for imm in range(256):
92 print(s.format(imm=imm, op=optimized_calls[imm][0]))
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 248ad3ced..b22725584 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
212 } 212 }
213 Optimization::SsaRewritePass(program); 213 Optimization::SsaRewritePass(program);
214 214
215 Optimization::ConstantPropagationPass(program);
216
215 Optimization::GlobalMemoryToStorageBufferPass(program); 217 Optimization::GlobalMemoryToStorageBufferPass(program);
216 Optimization::TexturePass(env, program); 218 Optimization::TexturePass(env, program);
217 219
218 Optimization::ConstantPropagationPass(program);
219
220 if (Settings::values.resolution_info.active) { 220 if (Settings::values.resolution_info.active) {
221 Optimization::RescalingPass(program); 221 Optimization::RescalingPass(program);
222 } 222 }
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index b6a20f904..0b2c60842 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -29,6 +29,46 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
29 }); 29 });
30} 30}
31 31
32void AddRegisterIndexedLdc(Info& info) {
33 info.uses_cbuf_indirect = true;
34
35 // The shader can use any possible constant buffer
36 info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1;
37
38 auto& cbufs{info.constant_buffer_descriptors};
39 cbufs.clear();
40 for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
41 cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1});
42
43 // The shader can use any possible access size
44 info.constant_buffer_used_sizes[i] = 0x10'000;
45 }
46}
47
48u32 GetElementSize(IR::Type& used_type, Shader::IR::Opcode opcode) {
49 switch (opcode) {
50 case IR::Opcode::GetCbufU8:
51 case IR::Opcode::GetCbufS8:
52 used_type |= IR::Type::U8;
53 return 1;
54 case IR::Opcode::GetCbufU16:
55 case IR::Opcode::GetCbufS16:
56 used_type |= IR::Type::U16;
57 return 2;
58 case IR::Opcode::GetCbufU32:
59 used_type |= IR::Type::U32;
60 return 4;
61 case IR::Opcode::GetCbufF32:
62 used_type |= IR::Type::F32;
63 return 4;
64 case IR::Opcode::GetCbufU32x2:
65 used_type |= IR::Type::U32x2;
66 return 8;
67 default:
68 throw InvalidArgument("Invalid opcode {}", opcode);
69 }
70}
71
32void GetPatch(Info& info, IR::Patch patch) { 72void GetPatch(Info& info, IR::Patch patch) {
33 if (!IR::IsGeneric(patch)) { 73 if (!IR::IsGeneric(patch)) {
34 throw NotImplementedException("Reading non-generic patch {}", patch); 74 throw NotImplementedException("Reading non-generic patch {}", patch);
@@ -360,6 +400,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
360 case IR::Opcode::GlobalAtomicOr64: 400 case IR::Opcode::GlobalAtomicOr64:
361 case IR::Opcode::GlobalAtomicXor64: 401 case IR::Opcode::GlobalAtomicXor64:
362 case IR::Opcode::GlobalAtomicExchange64: 402 case IR::Opcode::GlobalAtomicExchange64:
403 case IR::Opcode::GlobalAtomicIAdd32x2:
404 case IR::Opcode::GlobalAtomicSMin32x2:
405 case IR::Opcode::GlobalAtomicUMin32x2:
406 case IR::Opcode::GlobalAtomicSMax32x2:
407 case IR::Opcode::GlobalAtomicUMax32x2:
408 case IR::Opcode::GlobalAtomicAnd32x2:
409 case IR::Opcode::GlobalAtomicOr32x2:
410 case IR::Opcode::GlobalAtomicXor32x2:
411 case IR::Opcode::GlobalAtomicExchange32x2:
363 case IR::Opcode::GlobalAtomicAddF32: 412 case IR::Opcode::GlobalAtomicAddF32:
364 case IR::Opcode::GlobalAtomicAddF16x2: 413 case IR::Opcode::GlobalAtomicAddF16x2:
365 case IR::Opcode::GlobalAtomicAddF32x2: 414 case IR::Opcode::GlobalAtomicAddF32x2:
@@ -454,42 +503,18 @@ void VisitUsages(Info& info, IR::Inst& inst) {
454 case IR::Opcode::GetCbufU32x2: { 503 case IR::Opcode::GetCbufU32x2: {
455 const IR::Value index{inst.Arg(0)}; 504 const IR::Value index{inst.Arg(0)};
456 const IR::Value offset{inst.Arg(1)}; 505 const IR::Value offset{inst.Arg(1)};
457 if (!index.IsImmediate()) { 506 if (index.IsImmediate()) {
458 throw NotImplementedException("Constant buffer with non-immediate index"); 507 AddConstantBufferDescriptor(info, index.U32(), 1);
459 } 508 u32 element_size = GetElementSize(info.used_constant_buffer_types, inst.GetOpcode());
460 AddConstantBufferDescriptor(info, index.U32(), 1); 509 u32& size{info.constant_buffer_used_sizes[index.U32()]};
461 u32 element_size{}; 510 if (offset.IsImmediate()) {
462 switch (inst.GetOpcode()) { 511 size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
463 case IR::Opcode::GetCbufU8: 512 } else {
464 case IR::Opcode::GetCbufS8: 513 size = 0x10'000;
465 info.used_constant_buffer_types |= IR::Type::U8; 514 }
466 element_size = 1;
467 break;
468 case IR::Opcode::GetCbufU16:
469 case IR::Opcode::GetCbufS16:
470 info.used_constant_buffer_types |= IR::Type::U16;
471 element_size = 2;
472 break;
473 case IR::Opcode::GetCbufU32:
474 info.used_constant_buffer_types |= IR::Type::U32;
475 element_size = 4;
476 break;
477 case IR::Opcode::GetCbufF32:
478 info.used_constant_buffer_types |= IR::Type::F32;
479 element_size = 4;
480 break;
481 case IR::Opcode::GetCbufU32x2:
482 info.used_constant_buffer_types |= IR::Type::U32x2;
483 element_size = 8;
484 break;
485 default:
486 break;
487 }
488 u32& size{info.constant_buffer_used_sizes[index.U32()]};
489 if (offset.IsImmediate()) {
490 size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u);
491 } else { 515 } else {
492 size = 0x10'000; 516 AddRegisterIndexedLdc(info);
517 GetElementSize(info.used_indirect_cbuf_types, inst.GetOpcode());
493 } 518 }
494 break; 519 break;
495 } 520 }
@@ -597,6 +622,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
597 break; 622 break;
598 case IR::Opcode::LoadStorage64: 623 case IR::Opcode::LoadStorage64:
599 case IR::Opcode::WriteStorage64: 624 case IR::Opcode::WriteStorage64:
625 case IR::Opcode::StorageAtomicIAdd32x2:
626 case IR::Opcode::StorageAtomicSMin32x2:
627 case IR::Opcode::StorageAtomicUMin32x2:
628 case IR::Opcode::StorageAtomicSMax32x2:
629 case IR::Opcode::StorageAtomicUMax32x2:
630 case IR::Opcode::StorageAtomicAnd32x2:
631 case IR::Opcode::StorageAtomicOr32x2:
632 case IR::Opcode::StorageAtomicXor32x2:
633 case IR::Opcode::StorageAtomicExchange32x2:
600 info.used_storage_buffer_types |= IR::Type::U32x2; 634 info.used_storage_buffer_types |= IR::Type::U32x2;
601 break; 635 break;
602 case IR::Opcode::LoadStorage128: 636 case IR::Opcode::LoadStorage128:
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 4197b0095..ddf497e32 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -92,6 +92,15 @@ bool IsGlobalMemory(const IR::Inst& inst) {
92 case IR::Opcode::GlobalAtomicOr64: 92 case IR::Opcode::GlobalAtomicOr64:
93 case IR::Opcode::GlobalAtomicXor64: 93 case IR::Opcode::GlobalAtomicXor64:
94 case IR::Opcode::GlobalAtomicExchange64: 94 case IR::Opcode::GlobalAtomicExchange64:
95 case IR::Opcode::GlobalAtomicIAdd32x2:
96 case IR::Opcode::GlobalAtomicSMin32x2:
97 case IR::Opcode::GlobalAtomicUMin32x2:
98 case IR::Opcode::GlobalAtomicSMax32x2:
99 case IR::Opcode::GlobalAtomicUMax32x2:
100 case IR::Opcode::GlobalAtomicAnd32x2:
101 case IR::Opcode::GlobalAtomicOr32x2:
102 case IR::Opcode::GlobalAtomicXor32x2:
103 case IR::Opcode::GlobalAtomicExchange32x2:
95 case IR::Opcode::GlobalAtomicAddF32: 104 case IR::Opcode::GlobalAtomicAddF32:
96 case IR::Opcode::GlobalAtomicAddF16x2: 105 case IR::Opcode::GlobalAtomicAddF16x2:
97 case IR::Opcode::GlobalAtomicAddF32x2: 106 case IR::Opcode::GlobalAtomicAddF32x2:
@@ -135,6 +144,15 @@ bool IsGlobalMemoryWrite(const IR::Inst& inst) {
135 case IR::Opcode::GlobalAtomicOr64: 144 case IR::Opcode::GlobalAtomicOr64:
136 case IR::Opcode::GlobalAtomicXor64: 145 case IR::Opcode::GlobalAtomicXor64:
137 case IR::Opcode::GlobalAtomicExchange64: 146 case IR::Opcode::GlobalAtomicExchange64:
147 case IR::Opcode::GlobalAtomicIAdd32x2:
148 case IR::Opcode::GlobalAtomicSMin32x2:
149 case IR::Opcode::GlobalAtomicUMin32x2:
150 case IR::Opcode::GlobalAtomicSMax32x2:
151 case IR::Opcode::GlobalAtomicUMax32x2:
152 case IR::Opcode::GlobalAtomicAnd32x2:
153 case IR::Opcode::GlobalAtomicOr32x2:
154 case IR::Opcode::GlobalAtomicXor32x2:
155 case IR::Opcode::GlobalAtomicExchange32x2:
138 case IR::Opcode::GlobalAtomicAddF32: 156 case IR::Opcode::GlobalAtomicAddF32:
139 case IR::Opcode::GlobalAtomicAddF16x2: 157 case IR::Opcode::GlobalAtomicAddF16x2:
140 case IR::Opcode::GlobalAtomicAddF32x2: 158 case IR::Opcode::GlobalAtomicAddF32x2:
@@ -199,6 +217,8 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
199 return IR::Opcode::StorageAtomicOr32; 217 return IR::Opcode::StorageAtomicOr32;
200 case IR::Opcode::GlobalAtomicXor32: 218 case IR::Opcode::GlobalAtomicXor32:
201 return IR::Opcode::StorageAtomicXor32; 219 return IR::Opcode::StorageAtomicXor32;
220 case IR::Opcode::GlobalAtomicExchange32:
221 return IR::Opcode::StorageAtomicExchange32;
202 case IR::Opcode::GlobalAtomicIAdd64: 222 case IR::Opcode::GlobalAtomicIAdd64:
203 return IR::Opcode::StorageAtomicIAdd64; 223 return IR::Opcode::StorageAtomicIAdd64;
204 case IR::Opcode::GlobalAtomicSMin64: 224 case IR::Opcode::GlobalAtomicSMin64:
@@ -215,10 +235,26 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
215 return IR::Opcode::StorageAtomicOr64; 235 return IR::Opcode::StorageAtomicOr64;
216 case IR::Opcode::GlobalAtomicXor64: 236 case IR::Opcode::GlobalAtomicXor64:
217 return IR::Opcode::StorageAtomicXor64; 237 return IR::Opcode::StorageAtomicXor64;
218 case IR::Opcode::GlobalAtomicExchange32:
219 return IR::Opcode::StorageAtomicExchange32;
220 case IR::Opcode::GlobalAtomicExchange64: 238 case IR::Opcode::GlobalAtomicExchange64:
221 return IR::Opcode::StorageAtomicExchange64; 239 return IR::Opcode::StorageAtomicExchange64;
240 case IR::Opcode::GlobalAtomicIAdd32x2:
241 return IR::Opcode::StorageAtomicIAdd32x2;
242 case IR::Opcode::GlobalAtomicSMin32x2:
243 return IR::Opcode::StorageAtomicSMin32x2;
244 case IR::Opcode::GlobalAtomicUMin32x2:
245 return IR::Opcode::StorageAtomicUMin32x2;
246 case IR::Opcode::GlobalAtomicSMax32x2:
247 return IR::Opcode::StorageAtomicSMax32x2;
248 case IR::Opcode::GlobalAtomicUMax32x2:
249 return IR::Opcode::StorageAtomicUMax32x2;
250 case IR::Opcode::GlobalAtomicAnd32x2:
251 return IR::Opcode::StorageAtomicAnd32x2;
252 case IR::Opcode::GlobalAtomicOr32x2:
253 return IR::Opcode::StorageAtomicOr32x2;
254 case IR::Opcode::GlobalAtomicXor32x2:
255 return IR::Opcode::StorageAtomicXor32x2;
256 case IR::Opcode::GlobalAtomicExchange32x2:
257 return IR::Opcode::StorageAtomicExchange32x2;
222 case IR::Opcode::GlobalAtomicAddF32: 258 case IR::Opcode::GlobalAtomicAddF32:
223 return IR::Opcode::StorageAtomicAddF32; 259 return IR::Opcode::StorageAtomicAddF32;
224 case IR::Opcode::GlobalAtomicAddF16x2: 260 case IR::Opcode::GlobalAtomicAddF16x2:
@@ -298,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
298/// Tries to track the storage buffer address used by a global memory instruction 334/// Tries to track the storage buffer address used by a global memory instruction
299std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { 335std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
300 const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { 336 const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
301 if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { 337 if (inst->GetOpcode() != IR::Opcode::GetCbufU32 &&
338 inst->GetOpcode() != IR::Opcode::GetCbufU32x2) {
302 return std::nullopt; 339 return std::nullopt;
303 } 340 }
304 const IR::Value index{inst->Arg(0)}; 341 const IR::Value index{inst->Arg(0)};
@@ -454,6 +491,15 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
454 case IR::Opcode::GlobalAtomicOr64: 491 case IR::Opcode::GlobalAtomicOr64:
455 case IR::Opcode::GlobalAtomicXor64: 492 case IR::Opcode::GlobalAtomicXor64:
456 case IR::Opcode::GlobalAtomicExchange64: 493 case IR::Opcode::GlobalAtomicExchange64:
494 case IR::Opcode::GlobalAtomicIAdd32x2:
495 case IR::Opcode::GlobalAtomicSMin32x2:
496 case IR::Opcode::GlobalAtomicUMin32x2:
497 case IR::Opcode::GlobalAtomicSMax32x2:
498 case IR::Opcode::GlobalAtomicUMax32x2:
499 case IR::Opcode::GlobalAtomicAnd32x2:
500 case IR::Opcode::GlobalAtomicOr32x2:
501 case IR::Opcode::GlobalAtomicXor32x2:
502 case IR::Opcode::GlobalAtomicExchange32x2:
457 case IR::Opcode::GlobalAtomicAddF32: 503 case IR::Opcode::GlobalAtomicAddF32:
458 case IR::Opcode::GlobalAtomicAddF16x2: 504 case IR::Opcode::GlobalAtomicAddF16x2:
459 case IR::Opcode::GlobalAtomicAddF32x2: 505 case IR::Opcode::GlobalAtomicAddF32x2:
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
index e80d3d1d9..c2654cd9b 100644
--- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
@@ -199,6 +199,26 @@ void Lower(IR::Block& block, IR::Inst& inst) {
199 return ShiftRightLogical64To32(block, inst); 199 return ShiftRightLogical64To32(block, inst);
200 case IR::Opcode::ShiftRightArithmetic64: 200 case IR::Opcode::ShiftRightArithmetic64:
201 return ShiftRightArithmetic64To32(block, inst); 201 return ShiftRightArithmetic64To32(block, inst);
202 case IR::Opcode::SharedAtomicExchange64:
203 return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2);
204 case IR::Opcode::GlobalAtomicIAdd64:
205 return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2);
206 case IR::Opcode::GlobalAtomicSMin64:
207 return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2);
208 case IR::Opcode::GlobalAtomicUMin64:
209 return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2);
210 case IR::Opcode::GlobalAtomicSMax64:
211 return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2);
212 case IR::Opcode::GlobalAtomicUMax64:
213 return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2);
214 case IR::Opcode::GlobalAtomicAnd64:
215 return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2);
216 case IR::Opcode::GlobalAtomicOr64:
217 return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2);
218 case IR::Opcode::GlobalAtomicXor64:
219 return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2);
220 case IR::Opcode::GlobalAtomicExchange64:
221 return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2);
202 default: 222 default:
203 break; 223 break;
204 } 224 }
diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
index c28500dd1..496d4667e 100644
--- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp
+++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
@@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s
183 } 183 }
184} 184}
185 185
186void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
187 size_t index) {
188 const IR::Value composite{inst.Arg(index)};
189 if (composite.IsEmpty()) {
190 return;
191 }
192 const auto info{inst.Flags<IR::TextureInstInfo>()};
193 const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
194 const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
195 switch (info.type) {
196 case TextureType::ColorArray2D:
197 case TextureType::Color2D:
198 inst.SetArg(index, ir.CompositeConstruct(x, y));
199 break;
200 case TextureType::Color1D:
201 case TextureType::ColorArray1D:
202 case TextureType::Color3D:
203 case TextureType::ColorCube:
204 case TextureType::ColorArrayCube:
205 case TextureType::Buffer:
206 // Nothing to patch here
207 break;
208 }
209}
210
186void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { 211void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
187 const auto info{inst.Flags<IR::TextureInstInfo>()}; 212 const auto info{inst.Flags<IR::TextureInstInfo>()};
188 const IR::Value coord{inst.Arg(1)}; 213 const IR::Value coord{inst.Arg(1)};
@@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
220 const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; 245 const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
221 SubScaleCoord(ir, inst, is_scaled); 246 SubScaleCoord(ir, inst, is_scaled);
222 // Scale ImageFetch offset 247 // Scale ImageFetch offset
223 ScaleIntegerComposite(ir, inst, is_scaled, 2); 248 ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
224} 249}
225 250
226void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { 251void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
@@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
242 const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; 267 const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
243 ScaleIntegerComposite(ir, inst, is_scaled, 1); 268 ScaleIntegerComposite(ir, inst, is_scaled, 1);
244 // Scale ImageFetch offset 269 // Scale ImageFetch offset
245 ScaleIntegerComposite(ir, inst, is_scaled, 2); 270 ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
246} 271}
247 272
248void PatchImageRead(IR::Block& block, IR::Inst& inst) { 273void PatchImageRead(IR::Block& block, IR::Inst& inst) {
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 9f375c30e..9d36bd9eb 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -173,9 +173,11 @@ struct Info {
173 bool uses_atomic_image_u32{}; 173 bool uses_atomic_image_u32{};
174 bool uses_shadow_lod{}; 174 bool uses_shadow_lod{};
175 bool uses_rescaling_uniform{}; 175 bool uses_rescaling_uniform{};
176 bool uses_cbuf_indirect{};
176 177
177 IR::Type used_constant_buffer_types{}; 178 IR::Type used_constant_buffer_types{};
178 IR::Type used_storage_buffer_types{}; 179 IR::Type used_storage_buffer_types{};
180 IR::Type used_indirect_cbuf_types{};
179 181
180 u32 constant_buffer_mask{}; 182 u32 constant_buffer_mask{};
181 std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{}; 183 std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{};